diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d9a41ccd9e1f4850445f9b65c983d85f83b449b4
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30c034c905b9a0eea1b7cc3c4c77fbdf290e876b5a00c00f09397921c11013c3
+size 61385376
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04baeb1ad55086d386c983224a159983ed885fb2
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:725b67bba8a7c8b0286053847c3081378277810c7a29c183c24fbe0871520125
+size 122881658
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c4421ebb7cc7c9f31acec4968a8f80ff6c4bcc06
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85c805248eed07d14f2eedb8b706397ce25315fc3b96df0bb8479411b871bd7b
+size 14244
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..848ca8d531d2184c668c91c1e4f715a3aa39b2d8
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0f85b1b4566ce3af61937d9c59a498414651a1e667865d284be6f03114163df
+size 1064
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..98d3db68475d3e0e8cd2f3ba63f29287ef20fbfb
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/trainer_state.json
@@ -0,0 +1,11961 @@
+{
+ "best_global_step": 8856,
+ "best_metric": 0.7849709563979171,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856",
+ "epoch": 492.0,
+ "eval_steps": 500,
+ "global_step": 8856,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3401972747610332,
+ "eval_auc": 0.39064302367564674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25900759435847653,
+ "eval_f1_macro": 0.3321798728791878,
+ "eval_loss": 1.0617570877075195,
+ "eval_pr_auc": 0.1212308124824295,
+ "eval_precision": 0.15736885928393005,
+ "eval_precision_macro": 0.49944165947453734,
+ "eval_pred_class_0": 5256,
+ "eval_pred_class_1": 14412,
+ "eval_predicted_binding_ratio": 0.7327638804148872,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.4991767473782156,
+ "eval_runtime": 0.304,
+ "eval_samples_per_second": 536.239,
+ "eval_steps_per_second": 3.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 18
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.341010778930242,
+ "eval_auc": 0.39081343973238586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2590750585948665,
+ "eval_f1_macro": 0.33285209231533375,
+ "eval_loss": 1.0604556798934937,
+ "eval_pr_auc": 0.12126612292918731,
+ "eval_precision": 0.1574485825458588,
+ "eval_precision_macro": 0.4995923731531417,
+ "eval_pred_class_0": 5276,
+ "eval_pred_class_1": 14392,
+ "eval_predicted_binding_ratio": 0.731747000203376,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.4993975193845588,
+ "eval_runtime": 0.2793,
+ "eval_samples_per_second": 583.516,
+ "eval_steps_per_second": 3.58,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 36
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.34223103518405534,
+ "eval_auc": 0.3911369382652214,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591765446944969,
+ "eval_f1_macro": 0.33385837704253485,
+ "eval_loss": 1.058252215385437,
+ "eval_pr_auc": 0.12133107613942488,
+ "eval_precision": 0.15756858376270713,
+ "eval_precision_macro": 0.4998170849458089,
+ "eval_pred_class_0": 5306,
+ "eval_pred_class_1": 14362,
+ "eval_predicted_binding_ratio": 0.7302216798861094,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.49972867739407356,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.165,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 54
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.3441122635753508,
+ "eval_auc": 0.3915867840995182,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259556882103088,
+ "eval_f1_macro": 0.33544605079873757,
+ "eval_loss": 1.0551481246948242,
+ "eval_pr_auc": 0.12142208631760734,
+ "eval_precision": 0.15788003631031353,
+ "eval_precision_macro": 0.500391299247358,
+ "eval_pred_class_0": 5347,
+ "eval_pred_class_1": 14321,
+ "eval_predicted_binding_ratio": 0.7281370754525117,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5005832394650029,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.796,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 72
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3457392719137686,
+ "eval_auc": 0.39218283153314865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2597791072250345,
+ "eval_f1_macro": 0.3367955302889021,
+ "eval_loss": 1.0511513948440552,
+ "eval_pr_auc": 0.12154600341235242,
+ "eval_precision": 0.15809003710705033,
+ "eval_precision_macro": 0.5007720380521324,
+ "eval_pred_class_0": 5385,
+ "eval_pred_class_1": 14283,
+ "eval_predicted_binding_ratio": 0.7262050030506406,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011558413086458,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.078,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 90
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.34873906853772624,
+ "eval_auc": 0.39291782012189097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259894840238054,
+ "eval_f1_macro": 0.33921701928376435,
+ "eval_loss": 1.0462485551834106,
+ "eval_pr_auc": 0.1216940029412557,
+ "eval_precision": 0.1583133887089962,
+ "eval_precision_macro": 0.5011632853468087,
+ "eval_pred_class_0": 5462,
+ "eval_pred_class_1": 14206,
+ "eval_predicted_binding_ratio": 0.722290014236323,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.5017569691067321,
+ "eval_runtime": 0.2393,
+ "eval_samples_per_second": 681.219,
+ "eval_steps_per_second": 4.179,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 108
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3517897091722595,
+ "eval_auc": 0.3937714770704174,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2599407906193766,
+ "eval_f1_macro": 0.3416488972772128,
+ "eval_loss": 1.0405118465423584,
+ "eval_pr_auc": 0.12187498322705145,
+ "eval_precision": 0.1585020529520034,
+ "eval_precision_macro": 0.5014812682659693,
+ "eval_pred_class_0": 5542,
+ "eval_pred_class_1": 14126,
+ "eval_predicted_binding_ratio": 0.7182224933902787,
+ "eval_recall": 0.7220251531763947,
+ "eval_recall_macro": 0.5022572195531276,
+ "eval_runtime": 0.2765,
+ "eval_samples_per_second": 589.571,
+ "eval_steps_per_second": 3.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 126
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.354586129753915,
+ "eval_auc": 0.3947741191129793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591338858410179,
+ "eval_f1_macro": 0.3436917965373002,
+ "eval_loss": 1.0338975191116333,
+ "eval_pr_auc": 0.12208733120990471,
+ "eval_precision": 0.1581985320316397,
+ "eval_precision_macro": 0.5009271275952343,
+ "eval_pred_class_0": 5635,
+ "eval_pred_class_1": 14033,
+ "eval_predicted_binding_ratio": 0.713494000406752,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.5014270471245847,
+ "eval_runtime": 0.2385,
+ "eval_samples_per_second": 683.567,
+ "eval_steps_per_second": 4.194,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 144
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3598230628431971,
+ "eval_auc": 0.39592411118975185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25913503971756396,
+ "eval_f1_macro": 0.3477761944928628,
+ "eval_loss": 1.0263975858688354,
+ "eval_pr_auc": 0.12233214426039367,
+ "eval_precision": 0.15848567727076435,
+ "eval_precision_macro": 0.5013938604573427,
+ "eval_pred_class_0": 5774,
+ "eval_pred_class_1": 13894,
+ "eval_predicted_binding_ratio": 0.70642668293675,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.502176595531767,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.873,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 162
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.3636872076469392,
+ "eval_auc": 0.3972021050928084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25801861623288075,
+ "eval_f1_macro": 0.35051449303788773,
+ "eval_loss": 1.0180495977401733,
+ "eval_pr_auc": 0.12260540333611444,
+ "eval_precision": 0.15807060874618625,
+ "eval_precision_macro": 0.5006720376838353,
+ "eval_pred_class_0": 5902,
+ "eval_pred_class_1": 13766,
+ "eval_predicted_binding_ratio": 0.6999186495830791,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.5010628083511147,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 601.838,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 180
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.3691275167785235,
+ "eval_auc": 0.39866621357342186,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.258604206500956,
+ "eval_f1_macro": 0.35478882891419483,
+ "eval_loss": 1.0087939500808716,
+ "eval_pr_auc": 0.12292033936081492,
+ "eval_precision": 0.1587092042537587,
+ "eval_precision_macro": 0.5016983780261003,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.5027194256611,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.126,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 198
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3740593858043523,
+ "eval_auc": 0.4002664991794433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25823944086280654,
+ "eval_f1_macro": 0.35841740282728696,
+ "eval_loss": 0.9987770318984985,
+ "eval_pr_auc": 0.12326823892822446,
+ "eval_precision": 0.15878778897451096,
+ "eval_precision_macro": 0.5017853397238077,
+ "eval_pred_class_0": 6172,
+ "eval_pred_class_1": 13496,
+ "eval_predicted_binding_ratio": 0.6861907667276794,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.5028947176998165,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.708,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 216
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.37980475899938987,
+ "eval_auc": 0.40207323055334293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25811945018854154,
+ "eval_f1_macro": 0.36265804779890953,
+ "eval_loss": 0.987876832485199,
+ "eval_pr_auc": 0.12366119818610516,
+ "eval_precision": 0.15905854133873024,
+ "eval_precision_macro": 0.5021624301446299,
+ "eval_pred_class_0": 6327,
+ "eval_pred_class_1": 13341,
+ "eval_predicted_binding_ratio": 0.6783099450884685,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.5035528974067893,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.68,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.3867703884482408,
+ "eval_auc": 0.40404318566725245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2582866982350409,
+ "eval_f1_macro": 0.36779990383041317,
+ "eval_loss": 0.9760332107543945,
+ "eval_pr_auc": 0.12409453800524387,
+ "eval_precision": 0.1595744680851064,
+ "eval_precision_macro": 0.5028818867776484,
+ "eval_pred_class_0": 6508,
+ "eval_pred_class_1": 13160,
+ "eval_predicted_binding_ratio": 0.6691071791742933,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.5048043507851898,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.919,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 252
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.39429530201342283,
+ "eval_auc": 0.40629005957398867,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2577107607950651,
+ "eval_f1_macro": 0.37306889008104693,
+ "eval_loss": 0.9632152915000916,
+ "eval_pr_auc": 0.12458802431940903,
+ "eval_precision": 0.15971578622181032,
+ "eval_precision_macro": 0.5029977740632862,
+ "eval_pred_class_0": 6720,
+ "eval_pred_class_1": 12948,
+ "eval_predicted_binding_ratio": 0.6583282489322758,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.5050772111259515,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.248,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 270
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.4020744356314826,
+ "eval_auc": 0.408681470822737,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25738822935084615,
+ "eval_f1_macro": 0.37848134871797623,
+ "eval_loss": 0.9496278166770935,
+ "eval_pr_auc": 0.12511500176534787,
+ "eval_precision": 0.16003140950137418,
+ "eval_precision_macro": 0.5033533652151325,
+ "eval_pred_class_0": 6933,
+ "eval_pred_class_1": 12735,
+ "eval_predicted_binding_ratio": 0.6474984746796827,
+ "eval_recall": 0.6572073524669462,
+ "eval_recall_macro": 0.5057630895249562,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.014,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 288
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.408989221069758,
+ "eval_auc": 0.4113766625614338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.255349135169763,
+ "eval_f1_macro": 0.38271123621844805,
+ "eval_loss": 0.9350630640983582,
+ "eval_pr_auc": 0.1257149536327416,
+ "eval_precision": 0.15932528579422817,
+ "eval_precision_macro": 0.5022775332449281,
+ "eval_pred_class_0": 7159,
+ "eval_pred_class_1": 12509,
+ "eval_predicted_binding_ratio": 0.6360077282896075,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.5039700323120913,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.61,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 306
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4161582265609111,
+ "eval_auc": 0.41440477389195646,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2538176619663396,
+ "eval_f1_macro": 0.3871501973338609,
+ "eval_loss": 0.9196970462799072,
+ "eval_pr_auc": 0.12640556118775828,
+ "eval_precision": 0.158935546875,
+ "eval_precision_macro": 0.5016899956597223,
+ "eval_pred_class_0": 7380,
+ "eval_pred_class_1": 12288,
+ "eval_predicted_binding_ratio": 0.62477120195241,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.5029831666503388,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.105,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 324
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.4237848281472443,
+ "eval_auc": 0.4176888499450513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25100786464873437,
+ "eval_f1_macro": 0.3913994084656603,
+ "eval_loss": 0.9033117294311523,
+ "eval_pr_auc": 0.12714405405007598,
+ "eval_precision": 0.15785536159600996,
+ "eval_precision_macro": 0.5002421610284318,
+ "eval_pred_class_0": 7638,
+ "eval_pred_class_1": 12030,
+ "eval_predicted_binding_ratio": 0.611653447223917,
+ "eval_recall": 0.6123831022250886,
+ "eval_recall_macro": 0.5004331156685895,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.078,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 342
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4312080536912752,
+ "eval_auc": 0.4212995000006521,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24904343156340203,
+ "eval_f1_macro": 0.39564573885956833,
+ "eval_loss": 0.8857852220535278,
+ "eval_pr_auc": 0.12799421494868934,
+ "eval_precision": 0.1572566971854866,
+ "eval_precision_macro": 0.49948708843014167,
+ "eval_pred_class_0": 7872,
+ "eval_pred_class_1": 11796,
+ "eval_predicted_binding_ratio": 0.5997559487492373,
+ "eval_recall": 0.5981941309255079,
+ "eval_recall_macro": 0.4990729210793411,
+ "eval_runtime": 0.2709,
+ "eval_samples_per_second": 601.789,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 360
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.4394956274150905,
+ "eval_auc": 0.42538333442304876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24493150684931506,
+ "eval_f1_macro": 0.399632635701501,
+ "eval_loss": 0.8671084642410278,
+ "eval_pr_auc": 0.12894871744717554,
+ "eval_precision": 0.15549178189407775,
+ "eval_precision_macro": 0.4973810972146359,
+ "eval_pred_class_0": 8169,
+ "eval_pred_class_1": 11499,
+ "eval_predicted_binding_ratio": 0.5846552776082977,
+ "eval_recall": 0.5765881973556917,
+ "eval_recall_macro": 0.4952114645256155,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 607.95,
+ "eval_steps_per_second": 3.73,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 378
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.44824079723408583,
+ "eval_auc": 0.42976391273864795,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24080033580523297,
+ "eval_f1_macro": 0.4037241835563183,
+ "eval_loss": 0.8476783633232117,
+ "eval_pr_auc": 0.13001972671009082,
+ "eval_precision": 0.15375681229339766,
+ "eval_precision_macro": 0.495462476943159,
+ "eval_pred_class_0": 8475,
+ "eval_pred_class_1": 11193,
+ "eval_predicted_binding_ratio": 0.5690970103721782,
+ "eval_recall": 0.5549822637858756,
+ "eval_recall_macro": 0.491621632285284,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.096,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 396
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.46044335977221884,
+ "eval_auc": 0.4345819960798662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23819095477386934,
+ "eval_f1_macro": 0.4102471738365922,
+ "eval_loss": 0.8271914720535278,
+ "eval_pr_auc": 0.1312038077210987,
+ "eval_precision": 0.15319974143503556,
+ "eval_precision_macro": 0.49502955733365084,
+ "eval_pred_class_0": 8839,
+ "eval_pred_class_1": 10829,
+ "eval_predicted_binding_ratio": 0.5505897905226764,
+ "eval_recall": 0.5349887133182845,
+ "eval_recall_macro": 0.4907393617898237,
+ "eval_runtime": 0.2699,
+ "eval_samples_per_second": 603.817,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 414
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.4719849501728696,
+ "eval_auc": 0.4399397854182523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2337489854644728,
+ "eval_f1_macro": 0.4154821023975197,
+ "eval_loss": 0.8054794669151306,
+ "eval_pr_auc": 0.13253606290408437,
+ "eval_precision": 0.1515499425947187,
+ "eval_precision_macro": 0.4934724539362483,
+ "eval_pred_class_0": 9216,
+ "eval_pred_class_1": 10452,
+ "eval_predicted_binding_ratio": 0.5314215985356925,
+ "eval_recall": 0.5108029667849081,
+ "eval_recall_macro": 0.48776099326147077,
+ "eval_runtime": 0.2459,
+ "eval_samples_per_second": 662.968,
+ "eval_steps_per_second": 4.067,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 432
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.48713646532438476,
+ "eval_auc": 0.4457222328836341,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23099794160250056,
+ "eval_f1_macro": 0.42313846887516615,
+ "eval_loss": 0.7826969027519226,
+ "eval_pr_auc": 0.13403779679155806,
+ "eval_precision": 0.15125798722044728,
+ "eval_precision_macro": 0.4934698556077371,
+ "eval_pred_class_0": 9652,
+ "eval_pred_class_1": 10016,
+ "eval_predicted_binding_ratio": 0.5092536099247509,
+ "eval_recall": 0.48855207997420186,
+ "eval_recall_macro": 0.48771178574674356,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.772,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 450
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.506152125279642,
+ "eval_auc": 0.452125351005008,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22648721828462212,
+ "eval_f1_macro": 0.43188881620754876,
+ "eval_loss": 0.7587484121322632,
+ "eval_pr_auc": 0.13570124162691763,
+ "eval_precision": 0.15038071065989847,
+ "eval_precision_macro": 0.492983148122742,
+ "eval_pred_class_0": 10212,
+ "eval_pred_class_1": 9456,
+ "eval_predicted_binding_ratio": 0.4807809640024405,
+ "eval_recall": 0.4585617542728152,
+ "eval_recall_macro": 0.48681090671327726,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.566,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.529997966239577,
+ "eval_auc": 0.4588746151842906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22371514947934162,
+ "eval_f1_macro": 0.4433436473661838,
+ "eval_loss": 0.7342172861099243,
+ "eval_pr_auc": 0.13749280910612846,
+ "eval_precision": 0.15124332916997843,
+ "eval_precision_macro": 0.4941834913044441,
+ "eval_pred_class_0": 10861,
+ "eval_pred_class_1": 8807,
+ "eval_predicted_binding_ratio": 0.44778320113890585,
+ "eval_recall": 0.4295388584327636,
+ "eval_recall_macro": 0.4891703467029515,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.173,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 486
+ },
+ {
+ "epoch": 27.77777777777778,
+ "grad_norm": 191838.453125,
+ "learning_rate": 5.544444444444443e-07,
+ "loss": 0.954,
+ "step": 500
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.5579621720561317,
+ "eval_auc": 0.46628288633295734,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22208303507516106,
+ "eval_f1_macro": 0.4566736198103078,
+ "eval_loss": 0.7085328102111816,
+ "eval_pr_auc": 0.1397221721421834,
+ "eval_precision": 0.15368421052631578,
+ "eval_precision_macro": 0.4966212823527809,
+ "eval_pred_class_0": 11593,
+ "eval_pred_class_1": 8075,
+ "eval_predicted_binding_ratio": 0.4105653853976002,
+ "eval_recall": 0.400193485972267,
+ "eval_recall_macro": 0.49384334768221605,
+ "eval_runtime": 0.245,
+ "eval_samples_per_second": 665.264,
+ "eval_steps_per_second": 4.081,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 504
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.5890278625177954,
+ "eval_auc": 0.47432292318642716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21926011784023955,
+ "eval_f1_macro": 0.4701862470303913,
+ "eval_loss": 0.6820237636566162,
+ "eval_pr_auc": 0.1419831923592407,
+ "eval_precision": 0.15650854936569222,
+ "eval_precision_macro": 0.4990822386003719,
+ "eval_pred_class_0": 12416,
+ "eval_pred_class_1": 7252,
+ "eval_predicted_binding_ratio": 0.36872076469391907,
+ "eval_recall": 0.36601096420509516,
+ "eval_recall_macro": 0.49839149043235986,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 681.884,
+ "eval_steps_per_second": 4.183,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 522
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.62385600976205,
+ "eval_auc": 0.4829859859606367,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21797040169133192,
+ "eval_f1_macro": 0.4851734455906117,
+ "eval_loss": 0.6552286148071289,
+ "eval_pr_auc": 0.144533301478986,
+ "eval_precision": 0.16213241075640825,
+ "eval_precision_macro": 0.5032992807407407,
+ "eval_pred_class_0": 13309,
+ "eval_pred_class_1": 6359,
+ "eval_predicted_binding_ratio": 0.32331706324994913,
+ "eval_recall": 0.3324733956788133,
+ "eval_recall_macro": 0.5054351043101014,
+ "eval_runtime": 0.2289,
+ "eval_samples_per_second": 712.13,
+ "eval_steps_per_second": 4.369,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 540
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.6593959731543624,
+ "eval_auc": 0.4923438323703967,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21308586867144366,
+ "eval_f1_macro": 0.49787408315316334,
+ "eval_loss": 0.6283431053161621,
+ "eval_pr_auc": 0.14738118302130468,
+ "eval_precision": 0.16759053954175906,
+ "eval_precision_macro": 0.5068452136541568,
+ "eval_pred_class_0": 14256,
+ "eval_pred_class_1": 5412,
+ "eval_predicted_binding_ratio": 0.2751677852348993,
+ "eval_recall": 0.2924862947436311,
+ "eval_recall_macro": 0.5102800882784371,
+ "eval_runtime": 0.2716,
+ "eval_samples_per_second": 600.185,
+ "eval_steps_per_second": 3.682,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 558
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.6967154769168192,
+ "eval_auc": 0.5026651961769109,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21316449017280042,
+ "eval_f1_macro": 0.5126600281126953,
+ "eval_loss": 0.6013967990875244,
+ "eval_pr_auc": 0.15068393162958252,
+ "eval_precision": 0.18035714285714285,
+ "eval_precision_macro": 0.5146913446706046,
+ "eval_pred_class_0": 15188,
+ "eval_pred_class_1": 4480,
+ "eval_predicted_binding_ratio": 0.2277811673784828,
+ "eval_recall": 0.2605611093195743,
+ "eval_recall_macro": 0.5194578347949956,
+ "eval_runtime": 0.2705,
+ "eval_samples_per_second": 602.56,
+ "eval_steps_per_second": 3.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 576
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.7287472035794184,
+ "eval_auc": 0.5136348320064595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20692730786383232,
+ "eval_f1_macro": 0.5216610840892347,
+ "eval_loss": 0.575495719909668,
+ "eval_pr_auc": 0.15441446935423722,
+ "eval_precision": 0.19194704908990623,
+ "eval_precision_macro": 0.5210140431835268,
+ "eval_pred_class_0": 16042,
+ "eval_pred_class_1": 3626,
+ "eval_predicted_binding_ratio": 0.18436038234695953,
+ "eval_recall": 0.22444372782973235,
+ "eval_recall_macro": 0.5237930596654548,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.887,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 594
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.7577791336180598,
+ "eval_auc": 0.5256758602512032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20013431833445267,
+ "eval_f1_macro": 0.5287070633014385,
+ "eval_loss": 0.5502753853797913,
+ "eval_pr_auc": 0.15881070257620672,
+ "eval_precision": 0.20875656742556917,
+ "eval_precision_macro": 0.5298823579410603,
+ "eval_pred_class_0": 16813,
+ "eval_pred_class_1": 2855,
+ "eval_predicted_binding_ratio": 0.14515965019320723,
+ "eval_recall": 0.19219606578523057,
+ "eval_recall_macro": 0.5279203302306971,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.974,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 612
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.7844213951596501,
+ "eval_auc": 0.5388391234856937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19575113808801214,
+ "eval_f1_macro": 0.5356397775926205,
+ "eval_loss": 0.5265588164329529,
+ "eval_pr_auc": 0.16395620275178963,
+ "eval_precision": 0.23767848917549517,
+ "eval_precision_macro": 0.5449694383352471,
+ "eval_pred_class_0": 17497,
+ "eval_pred_class_1": 2171,
+ "eval_predicted_binding_ratio": 0.11038234695952817,
+ "eval_recall": 0.16639793614962914,
+ "eval_recall_macro": 0.5332502748895668,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.866,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 630
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8094366483628228,
+ "eval_auc": 0.5531888075405533,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19189305735230702,
+ "eval_f1_macro": 0.5419376520838427,
+ "eval_loss": 0.5044229626655579,
+ "eval_pr_auc": 0.16987983494600534,
+ "eval_precision": 0.28952504879635654,
+ "eval_precision_macro": 0.5715178054086024,
+ "eval_pred_class_0": 18131,
+ "eval_pred_class_1": 1537,
+ "eval_predicted_binding_ratio": 0.07814724425462681,
+ "eval_recall": 0.14350209609803288,
+ "eval_recall_macro": 0.5387939646905326,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.446,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 648
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.827791336180598,
+ "eval_auc": 0.5689342779333475,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18796451690242147,
+ "eval_f1_macro": 0.5458235779450257,
+ "eval_loss": 0.4842270016670227,
+ "eval_pr_auc": 0.177326879876991,
+ "eval_precision": 0.3663551401869159,
+ "eval_precision_macro": 0.6103471582212137,
+ "eval_pred_class_0": 18598,
+ "eval_pred_class_1": 1070,
+ "eval_predicted_binding_ratio": 0.05440309131584299,
+ "eval_recall": 0.12641083521444696,
+ "eval_recall_macro": 0.5427430526648682,
+ "eval_runtime": 0.2392,
+ "eval_samples_per_second": 681.483,
+ "eval_steps_per_second": 4.181,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 666
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8386719544437665,
+ "eval_auc": 0.5868017348062602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1820056715648363,
+ "eval_f1_macro": 0.5462584975699354,
+ "eval_loss": 0.46564891934394836,
+ "eval_pr_auc": 0.18685168882837525,
+ "eval_precision": 0.4537275064267352,
+ "eval_precision_macro": 0.6541268553838282,
+ "eval_pred_class_0": 18890,
+ "eval_pred_class_1": 778,
+ "eval_predicted_binding_ratio": 0.039556640227781166,
+ "eval_recall": 0.11383424701709126,
+ "eval_recall_macro": 0.5440904198204911,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.584,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 684
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8441631075859264,
+ "eval_auc": 0.6057814605899876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17982338774417983,
+ "eval_f1_macro": 0.5468627318225942,
+ "eval_loss": 0.4492926001548767,
+ "eval_pr_auc": 0.19848375437748741,
+ "eval_precision": 0.5283018867924528,
+ "eval_precision_macro": 0.6915101279275421,
+ "eval_pred_class_0": 19032,
+ "eval_pred_class_1": 636,
+ "eval_predicted_binding_ratio": 0.03233679072605247,
+ "eval_recall": 0.10835214446952596,
+ "eval_recall_macro": 0.5451219284549598,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.049,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.8456375838926175,
+ "eval_auc": 0.6262280880815292,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1758957654723127,
+ "eval_f1_macro": 0.5453696262568565,
+ "eval_loss": 0.43461790680885315,
+ "eval_pr_auc": 0.21275175506055685,
+ "eval_precision": 0.5557461406518011,
+ "eval_precision_macro": 0.7051195990133514,
+ "eval_pred_class_0": 19085,
+ "eval_pred_class_1": 583,
+ "eval_predicted_binding_ratio": 0.0296420581655481,
+ "eval_recall": 0.10448242502418574,
+ "eval_recall_macro": 0.544424468382196,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.873,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 720
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8483323164531218,
+ "eval_auc": 0.6481986497247736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18519530183010108,
+ "eval_f1_macro": 0.5507896621273841,
+ "eval_loss": 0.42107364535331726,
+ "eval_pr_auc": 0.23051421419341214,
+ "eval_precision": 0.6053571428571428,
+ "eval_precision_macro": 0.730405178085469,
+ "eval_pred_class_0": 19108,
+ "eval_pred_class_1": 560,
+ "eval_predicted_binding_ratio": 0.02847264592231035,
+ "eval_recall": 0.10931957433086101,
+ "eval_recall_macro": 0.5479899012476421,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.385,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 738
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8502135448444174,
+ "eval_auc": 0.6709813300109956,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20292207792207792,
+ "eval_f1_macro": 0.5601310726310726,
+ "eval_loss": 0.4084097743034363,
+ "eval_pr_auc": 0.25093797354762637,
+ "eval_precision": 0.6302521008403361,
+ "eval_precision_macro": 0.7436637739036264,
+ "eval_pred_class_0": 19073,
+ "eval_pred_class_1": 595,
+ "eval_predicted_binding_ratio": 0.03025218629245475,
+ "eval_recall": 0.12092873266688164,
+ "eval_recall_macro": 0.5538246608949184,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.746,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 756
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8526540573520439,
+ "eval_auc": 0.6936772353365158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22263948497854077,
+ "eval_f1_macro": 0.5706266398157138,
+ "eval_loss": 0.39666271209716797,
+ "eval_pr_auc": 0.2738840395423864,
+ "eval_precision": 0.6618819776714514,
+ "eval_precision_macro": 0.7604089789622948,
+ "eval_pred_class_0": 19041,
+ "eval_pred_class_1": 627,
+ "eval_predicted_binding_ratio": 0.031879194630872486,
+ "eval_recall": 0.13382779748468235,
+ "eval_recall_macro": 0.5605156371379469,
+ "eval_runtime": 0.2268,
+ "eval_samples_per_second": 718.638,
+ "eval_steps_per_second": 4.409,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 774
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.8544844417327638,
+ "eval_auc": 0.7158095511124275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24325753569539926,
+ "eval_f1_macro": 0.58138013196988,
+ "eval_loss": 0.38576817512512207,
+ "eval_pr_auc": 0.29905525248581355,
+ "eval_precision": 0.6754772393538914,
+ "eval_precision_macro": 0.7681910344870789,
+ "eval_pred_class_0": 18987,
+ "eval_pred_class_1": 681,
+ "eval_predicted_binding_ratio": 0.03462477120195241,
+ "eval_recall": 0.14833924540470816,
+ "eval_recall_macro": 0.5674997367845657,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.061,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 792
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8575859263778727,
+ "eval_auc": 0.737084336405228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2808729139922978,
+ "eval_f1_macro": 0.60092007766148,
+ "eval_loss": 0.37560486793518066,
+ "eval_pr_auc": 0.3260256629572295,
+ "eval_precision": 0.6889168765743073,
+ "eval_precision_macro": 0.7767992245539758,
+ "eval_pred_class_0": 18874,
+ "eval_pred_class_1": 794,
+ "eval_predicted_binding_ratio": 0.040370144396990035,
+ "eval_recall": 0.1763947113834247,
+ "eval_recall_macro": 0.5807427773130077,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.1,
+ "eval_steps_per_second": 3.725,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 810
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8611450071181614,
+ "eval_auc": 0.7571642141385685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.32451150136037593,
+ "eval_f1_macro": 0.623565358817779,
+ "eval_loss": 0.36611661314964294,
+ "eval_pr_auc": 0.3532860869347882,
+ "eval_precision": 0.6963906581740976,
+ "eval_precision_macro": 0.7829117661264593,
+ "eval_pred_class_0": 18726,
+ "eval_pred_class_1": 942,
+ "eval_predicted_binding_ratio": 0.047895057962172055,
+ "eval_recall": 0.21154466301193164,
+ "eval_recall_macro": 0.5971407144358867,
+ "eval_runtime": 0.2698,
+ "eval_samples_per_second": 604.115,
+ "eval_steps_per_second": 3.706,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 828
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8646532438478747,
+ "eval_auc": 0.7759390708192488,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3694931312174325,
+ "eval_f1_macro": 0.6468414565354121,
+ "eval_loss": 0.3574466407299042,
+ "eval_pr_auc": 0.380672409235741,
+ "eval_precision": 0.695807314897413,
+ "eval_precision_macro": 0.7853328912870632,
+ "eval_pred_class_0": 18547,
+ "eval_pred_class_1": 1121,
+ "eval_predicted_binding_ratio": 0.05699613585519626,
+ "eval_recall": 0.25153176394711385,
+ "eval_recall_macro": 0.6154743385438473,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.868,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 846
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8670937563555013,
+ "eval_auc": 0.7932697219796829,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4004587155963303,
+ "eval_f1_macro": 0.6628608765538834,
+ "eval_loss": 0.34963178634643555,
+ "eval_pr_auc": 0.40879055918048346,
+ "eval_precision": 0.69340746624305,
+ "eval_precision_macro": 0.7861898540406407,
+ "eval_pred_class_0": 18409,
+ "eval_pred_class_1": 1259,
+ "eval_predicted_binding_ratio": 0.06401260931462274,
+ "eval_recall": 0.2815220896485005,
+ "eval_recall_macro": 0.6291113798275701,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.281,
+ "eval_steps_per_second": 3.971,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 864
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8693308928208257,
+ "eval_auc": 0.8090460638591691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.43491644678979774,
+ "eval_f1_macro": 0.6805201987887128,
+ "eval_loss": 0.3424255847930908,
+ "eval_pr_auc": 0.43548439720530613,
+ "eval_precision": 0.6834830684174154,
+ "eval_precision_macro": 0.783786427463743,
+ "eval_pred_class_0": 18221,
+ "eval_pred_class_1": 1447,
+ "eval_predicted_binding_ratio": 0.07357128330282693,
+ "eval_recall": 0.3189293776201225,
+ "eval_recall_macro": 0.6456420293062284,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.097,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 882
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8711612772015457,
+ "eval_auc": 0.8231584209269593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.46153846153846156,
+ "eval_f1_macro": 0.6941824562962304,
+ "eval_loss": 0.3358187675476074,
+ "eval_pr_auc": 0.46013674866792464,
+ "eval_precision": 0.6766355140186916,
+ "eval_precision_macro": 0.7825407542966181,
+ "eval_pred_class_0": 18063,
+ "eval_pred_class_1": 1605,
+ "eval_predicted_binding_ratio": 0.0816046369737645,
+ "eval_recall": 0.35020960980328925,
+ "eval_recall_macro": 0.659441136162585,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 900
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8745169818995322,
+ "eval_auc": 0.8357514570475526,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.49363972096840375,
+ "eval_f1_macro": 0.7110123043354003,
+ "eval_loss": 0.32985639572143555,
+ "eval_pr_auc": 0.48277553791567623,
+ "eval_precision": 0.6785109983079526,
+ "eval_precision_macro": 0.7862239260888744,
+ "eval_pred_class_0": 17895,
+ "eval_pred_class_1": 1773,
+ "eval_predicted_binding_ratio": 0.0901464307504576,
+ "eval_recall": 0.38793937439535636,
+ "eval_recall_macro": 0.6767668140160521,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 653.0,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 918
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8772117144600367,
+ "eval_auc": 0.8463606400457255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.523574669560071,
+ "eval_f1_macro": 0.7265493507137326,
+ "eval_loss": 0.3246362507343292,
+ "eval_pr_auc": 0.5014434788718165,
+ "eval_precision": 0.6742886178861789,
+ "eval_precision_macro": 0.787031314592807,
+ "eval_pred_class_0": 17700,
+ "eval_pred_class_1": 1968,
+ "eval_predicted_binding_ratio": 0.10006101281269067,
+ "eval_recall": 0.4279264753305385,
+ "eval_recall_macro": 0.6946175504557563,
+ "eval_runtime": 0.2301,
+ "eval_samples_per_second": 708.248,
+ "eval_steps_per_second": 4.345,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.8556500280578212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5445847684653655,
+ "eval_f1_macro": 0.7374052543587455,
+ "eval_loss": 0.3201504647731781,
+ "eval_pr_auc": 0.5184804467620471,
+ "eval_precision": 0.6696470588235294,
+ "eval_precision_macro": 0.786998185969936,
+ "eval_pred_class_0": 17543,
+ "eval_pred_class_1": 2125,
+ "eval_predicted_binding_ratio": 0.10804352247305267,
+ "eval_recall": 0.45888423089326025,
+ "eval_recall_macro": 0.7082554190018906,
+ "eval_runtime": 0.2666,
+ "eval_samples_per_second": 611.344,
+ "eval_steps_per_second": 3.751,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 954
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8792454748830588,
+ "eval_auc": 0.8636336358823378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5594509367464292,
+ "eval_f1_macro": 0.744742407539513,
+ "eval_loss": 0.31616976857185364,
+ "eval_pr_auc": 0.5331175601979875,
+ "eval_precision": 0.6585152838427948,
+ "eval_precision_macro": 0.7834238290545543,
+ "eval_pred_class_0": 17378,
+ "eval_pred_class_1": 2290,
+ "eval_predicted_binding_ratio": 0.11643278421801911,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719546237029523,
+ "eval_runtime": 0.2694,
+ "eval_samples_per_second": 604.964,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 972
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8814317673378076,
+ "eval_auc": 0.8703512791725087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.578757225433526,
+ "eval_f1_macro": 0.7548815712966447,
+ "eval_loss": 0.31280621886253357,
+ "eval_pr_auc": 0.5453871030590061,
+ "eval_precision": 0.657905544147844,
+ "eval_precision_macro": 0.7854606348952531,
+ "eval_pred_class_0": 17233,
+ "eval_pred_class_1": 2435,
+ "eval_predicted_binding_ratio": 0.12380516575147447,
+ "eval_recall": 0.5166075459529185,
+ "eval_recall_macro": 0.7331634337478723,
+ "eval_runtime": 0.3648,
+ "eval_samples_per_second": 446.874,
+ "eval_steps_per_second": 2.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 990
+ },
+ {
+ "epoch": 55.55555555555556,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.996314582053105e-07,
+ "loss": 0.4604,
+ "step": 1000
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8834146837502542,
+ "eval_auc": 0.8759527216222862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5956621407159232,
+ "eval_f1_macro": 0.7637749289648232,
+ "eval_loss": 0.3100614845752716,
+ "eval_pr_auc": 0.5551596710183998,
+ "eval_precision": 0.6571984435797665,
+ "eval_precision_macro": 0.7873078426812156,
+ "eval_pred_class_0": 17098,
+ "eval_pred_class_1": 2570,
+ "eval_predicted_binding_ratio": 0.1306691071791743,
+ "eval_recall": 0.5446630119316349,
+ "eval_recall_macro": 0.745742503732462,
+ "eval_runtime": 0.2507,
+ "eval_samples_per_second": 650.302,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1008
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8840756558877364,
+ "eval_auc": 0.8809651824326759,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6064894718674491,
+ "eval_f1_macro": 0.7692574960553631,
+ "eval_loss": 0.30748215317726135,
+ "eval_pr_auc": 0.5634298069700459,
+ "eval_precision": 0.6524322317118455,
+ "eval_precision_macro": 0.7866284869899434,
+ "eval_pred_class_0": 16975,
+ "eval_pred_class_1": 2693,
+ "eval_predicted_binding_ratio": 0.13692292047996746,
+ "eval_recall": 0.5665914221218962,
+ "eval_recall_macro": 0.7550467824679621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.163,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1026
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.8849908480780964,
+ "eval_auc": 0.8853361353068065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6121399176954733,
+ "eval_f1_macro": 0.7723127955239544,
+ "eval_loss": 0.30511021614074707,
+ "eval_pr_auc": 0.5712324508006517,
+ "eval_precision": 0.6536067374588063,
+ "eval_precision_macro": 0.7879535133831199,
+ "eval_pred_class_0": 16937,
+ "eval_pred_class_1": 2731,
+ "eval_predicted_binding_ratio": 0.13885499288183853,
+ "eval_recall": 0.5756207674943566,
+ "eval_recall_macro": 0.7592596503615321,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.515,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1044
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.8853467561521253,
+ "eval_auc": 0.8892084338643703,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.616822429906542,
+ "eval_f1_macro": 0.7747051971959543,
+ "eval_loss": 0.3030014634132385,
+ "eval_pr_auc": 0.5778370115776272,
+ "eval_precision": 0.6519396551724138,
+ "eval_precision_macro": 0.7878864350252024,
+ "eval_pred_class_0": 16884,
+ "eval_pred_class_1": 2784,
+ "eval_predicted_binding_ratio": 0.1415497254423429,
+ "eval_recall": 0.5852950661077072,
+ "eval_recall_macro": 0.7634026486450891,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.008,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1062
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.884685784014643,
+ "eval_auc": 0.8924710108272688,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6202277294038848,
+ "eval_f1_macro": 0.7761251343338811,
+ "eval_loss": 0.3011925220489502,
+ "eval_pr_auc": 0.5832812236308141,
+ "eval_precision": 0.6450714036920934,
+ "eval_precision_macro": 0.7853564436451774,
+ "eval_pred_class_0": 16797,
+ "eval_pred_class_1": 2871,
+ "eval_predicted_binding_ratio": 0.14597315436241612,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7678594421600216,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.98,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1080
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8856518202155786,
+ "eval_auc": 0.8954398707041407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6242272347535506,
+ "eval_f1_macro": 0.7783964874556335,
+ "eval_loss": 0.29942846298217773,
+ "eval_pr_auc": 0.5886815510653964,
+ "eval_precision": 0.6477115117891817,
+ "eval_precision_macro": 0.7871243450270979,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7705297965613797,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.921,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1098
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8865161683953631,
+ "eval_auc": 0.8978366542923133,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6288659793814433,
+ "eval_f1_macro": 0.7809416026191173,
+ "eval_loss": 0.29785510897636414,
+ "eval_pr_auc": 0.593021329597711,
+ "eval_precision": 0.6491589426707861,
+ "eval_precision_macro": 0.7884708470441367,
+ "eval_pred_class_0": 16755,
+ "eval_pred_class_1": 2913,
+ "eval_predicted_binding_ratio": 0.14810860280658938,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7740571948209012,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.719,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1116
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8871262965222697,
+ "eval_auc": 0.8998632136201572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6315964155326916,
+ "eval_f1_macro": 0.7824748814379159,
+ "eval_loss": 0.2964444160461426,
+ "eval_pr_auc": 0.5970041919243015,
+ "eval_precision": 0.6505982905982906,
+ "eval_precision_macro": 0.789523000044412,
+ "eval_pred_class_0": 16743,
+ "eval_pred_class_1": 2925,
+ "eval_predicted_binding_ratio": 0.14871873093349602,
+ "eval_recall": 0.6136730087068688,
+ "eval_recall_macro": 0.7759920545435715,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.792,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1134
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.8876855806386008,
+ "eval_auc": 0.9017429582012333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6356589147286822,
+ "eval_f1_macro": 0.7846343742639293,
+ "eval_loss": 0.2951850891113281,
+ "eval_pr_auc": 0.6005268804358049,
+ "eval_precision": 0.650573936529372,
+ "eval_precision_macro": 0.7901498917652248,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.7794694277584535,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.376,
+ "eval_steps_per_second": 3.739,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1152
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8881940207443563,
+ "eval_auc": 0.9033021142666618,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6377861966727063,
+ "eval_f1_macro": 0.785840340182137,
+ "eval_loss": 0.2939698398113251,
+ "eval_pr_auc": 0.6035211605243039,
+ "eval_precision": 0.6518518518518519,
+ "eval_precision_macro": 0.7910415086304414,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6243147371815544,
+ "eval_recall_macro": 0.7809507530297222,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.415,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8882448647549319,
+ "eval_auc": 0.9048048023731414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389618922470434,
+ "eval_f1_macro": 0.7864263262967652,
+ "eval_loss": 0.29283198714256287,
+ "eval_pr_auc": 0.6066927627742578,
+ "eval_precision": 0.6511550050217609,
+ "eval_precision_macro": 0.7909273016835919,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6272170267655595,
+ "eval_recall_macro": 0.7821604539875966,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.68,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1188
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.888346552776083,
+ "eval_auc": 0.9061457752769495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6402359108781127,
+ "eval_f1_macro": 0.7870775124924988,
+ "eval_loss": 0.29169291257858276,
+ "eval_pr_auc": 0.6096183698390041,
+ "eval_precision": 0.6506826506826506,
+ "eval_precision_macro": 0.7909278839971909,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.783400335424737,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.516,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1206
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9074730837522218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641781270464964,
+ "eval_f1_macro": 0.7879665952661885,
+ "eval_loss": 0.2904839515686035,
+ "eval_pr_auc": 0.6127271933864005,
+ "eval_precision": 0.6518124376454939,
+ "eval_precision_macro": 0.7916645766644131,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6320541760722348,
+ "eval_recall_macro": 0.7844281262446042,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.958,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1224
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.889261744966443,
+ "eval_auc": 0.9085174295528148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6431847968545217,
+ "eval_f1_macro": 0.788822778783544,
+ "eval_loss": 0.28959015011787415,
+ "eval_pr_auc": 0.6152976575518759,
+ "eval_precision": 0.6536796536796536,
+ "eval_precision_macro": 0.7926964124983926,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6330216059335698,
+ "eval_recall_macro": 0.7851231045301337,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.894,
+ "eval_steps_per_second": 4.079,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1242
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9095093658465239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6446766819072501,
+ "eval_f1_macro": 0.7895790973067505,
+ "eval_loss": 0.2887136936187744,
+ "eval_pr_auc": 0.6176593727552148,
+ "eval_precision": 0.6529937148527952,
+ "eval_precision_macro": 0.7926428472131204,
+ "eval_pred_class_0": 16645,
+ "eval_pred_class_1": 3023,
+ "eval_predicted_binding_ratio": 0.15370144396990035,
+ "eval_recall": 0.636568848758465,
+ "eval_recall_macro": 0.7866251016291872,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.817,
+ "eval_steps_per_second": 3.796,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1260
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.889821029082774,
+ "eval_auc": 0.9104295930879169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6464349812367434,
+ "eval_f1_macro": 0.7905888279869988,
+ "eval_loss": 0.28783899545669556,
+ "eval_pr_auc": 0.619972501272285,
+ "eval_precision": 0.6542272126816381,
+ "eval_precision_macro": 0.7934597601869728,
+ "eval_pred_class_0": 16640,
+ "eval_pred_class_1": 3028,
+ "eval_predicted_binding_ratio": 0.1539556640227781,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7878141307592769,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.334,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1278
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8899735611145008,
+ "eval_auc": 0.911281293804153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.646289637136319,
+ "eval_f1_macro": 0.7905721170208057,
+ "eval_loss": 0.28697267174720764,
+ "eval_pr_auc": 0.6225153426830469,
+ "eval_precision": 0.6552867086509778,
+ "eval_precision_macro": 0.7938916277024632,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.6375362786198001,
+ "eval_recall_macro": 0.7873804408732489,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.762,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1296
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.890736221273134,
+ "eval_auc": 0.911925575502615,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6505122784192552,
+ "eval_f1_macro": 0.7928790035842321,
+ "eval_loss": 0.2863345444202423,
+ "eval_pr_auc": 0.6235765349975187,
+ "eval_precision": 0.6561679790026247,
+ "eval_precision_macro": 0.7949612458190018,
+ "eval_pred_class_0": 16620,
+ "eval_pred_class_1": 3048,
+ "eval_predicted_binding_ratio": 0.1549725442342892,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7908474781742385,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.574,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1314
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9126060328997004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506692784851453,
+ "eval_f1_macro": 0.793115428161573,
+ "eval_loss": 0.28558436036109924,
+ "eval_pr_auc": 0.625919370718976,
+ "eval_precision": 0.6588429752066116,
+ "eval_precision_macro": 0.7961342196828587,
+ "eval_pred_class_0": 16643,
+ "eval_pred_class_1": 3025,
+ "eval_predicted_binding_ratio": 0.15380313199105144,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.7902016976709372,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.167,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1332
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8916005694529184,
+ "eval_auc": 0.9132873078266985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6526555881394591,
+ "eval_f1_macro": 0.794217425975266,
+ "eval_loss": 0.28493690490722656,
+ "eval_pr_auc": 0.6278320531638758,
+ "eval_precision": 0.6595324333223576,
+ "eval_precision_macro": 0.7967555738856391,
+ "eval_pred_class_0": 16631,
+ "eval_pred_class_1": 3037,
+ "eval_predicted_binding_ratio": 0.1544132601179581,
+ "eval_recall": 0.6459206707513705,
+ "eval_recall_macro": 0.7917537198146302,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.773,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1350
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139370494570753,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6551948051948052,
+ "eval_f1_macro": 0.7955863102414826,
+ "eval_loss": 0.28430166840553284,
+ "eval_pr_auc": 0.6292546024902547,
+ "eval_precision": 0.6596927100359594,
+ "eval_precision_macro": 0.7972435493102309,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7939610311131058,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.459,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1368
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.8921598535692495,
+ "eval_auc": 0.9146080371326758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6542787286063569,
+ "eval_f1_macro": 0.7951975553215214,
+ "eval_loss": 0.283497154712677,
+ "eval_pr_auc": 0.6315022943889131,
+ "eval_precision": 0.6615029663810151,
+ "eval_precision_macro": 0.7978670296615908,
+ "eval_pred_class_0": 16634,
+ "eval_pred_class_1": 3034,
+ "eval_predicted_binding_ratio": 0.15426072808623145,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7926099364103822,
+ "eval_runtime": 0.2203,
+ "eval_samples_per_second": 740.067,
+ "eval_steps_per_second": 4.54,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1386
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9151027497871649,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655037495924356,
+ "eval_f1_macro": 0.7956531976941219,
+ "eval_loss": 0.2829034626483917,
+ "eval_pr_auc": 0.6331226154536788,
+ "eval_precision": 0.6623804813715793,
+ "eval_precision_macro": 0.7983678781970611,
+ "eval_pred_class_0": 16635,
+ "eval_pred_class_1": 3033,
+ "eval_predicted_binding_ratio": 0.1542098840756559,
+ "eval_recall": 0.6478555304740407,
+ "eval_recall_macro": 0.7930229544686254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.571,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8925157616432784,
+ "eval_auc": 0.9156585533376076,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6559244791666666,
+ "eval_f1_macro": 0.7961172166862497,
+ "eval_loss": 0.28232645988464355,
+ "eval_pr_auc": 0.6343326075351273,
+ "eval_precision": 0.6621754847190273,
+ "eval_precision_macro": 0.7984260882241754,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6497903901967107,
+ "eval_recall_macro": 0.7938696624128962,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.665,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1422
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9162826303682348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6572219508223416,
+ "eval_f1_macro": 0.7969043930945569,
+ "eval_loss": 0.28162533044815063,
+ "eval_pr_auc": 0.6363164977912346,
+ "eval_precision": 0.6638157894736842,
+ "eval_precision_macro": 0.7993423426560147,
+ "eval_pred_class_0": 16628,
+ "eval_pred_class_1": 3040,
+ "eval_predicted_binding_ratio": 0.15456579214968477,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.79453446021916,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.266,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1440
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8928208257067317,
+ "eval_auc": 0.9167705886684476,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.657124268054652,
+ "eval_f1_macro": 0.7968036671115732,
+ "eval_loss": 0.2811121940612793,
+ "eval_pr_auc": 0.6378571407612313,
+ "eval_precision": 0.6629471611421069,
+ "eval_precision_macro": 0.7989544782306408,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6514027732989358,
+ "eval_recall_macro": 0.7947060344432748,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.342,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1458
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9172343422437541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6581001951854262,
+ "eval_f1_macro": 0.7973820247953165,
+ "eval_loss": 0.2804972231388092,
+ "eval_pr_auc": 0.6394259907419034,
+ "eval_precision": 0.6639317361339022,
+ "eval_precision_macro": 0.7995370130040789,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7952802908117405,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.86,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1476
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8933801098230628,
+ "eval_auc": 0.9176662705474707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597436313483693,
+ "eval_f1_macro": 0.7982647858607822,
+ "eval_loss": 0.28001976013183594,
+ "eval_pr_auc": 0.6407525699560299,
+ "eval_precision": 0.6639451338994121,
+ "eval_precision_macro": 0.7998155152816343,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6555949693647211,
+ "eval_recall_macro": 0.7967417715176355,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.967,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1494
+ },
+ {
+ "epoch": 83.33333333333333,
+ "grad_norm": 11845.0048828125,
+ "learning_rate": 9.86567120987093e-07,
+ "loss": 0.2741,
+ "step": 1500
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9181069872977458,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592015579357352,
+ "eval_f1_macro": 0.7979494857864604,
+ "eval_loss": 0.2794816195964813,
+ "eval_pr_auc": 0.6421665674351047,
+ "eval_precision": 0.6635086573015354,
+ "eval_precision_macro": 0.799538997766201,
+ "eval_pred_class_0": 16607,
+ "eval_pred_class_1": 3061,
+ "eval_predicted_binding_ratio": 0.15563351637177142,
+ "eval_recall": 0.654950016123831,
+ "eval_recall_macro": 0.7963891144179245,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.121,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1512
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9184344761551537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6595330739299611,
+ "eval_f1_macro": 0.7981095181516664,
+ "eval_loss": 0.27904370427131653,
+ "eval_pr_auc": 0.6429990318434126,
+ "eval_precision": 0.6631887838278449,
+ "eval_precision_macro": 0.7994577736379149,
+ "eval_pred_class_0": 16601,
+ "eval_pred_class_1": 3067,
+ "eval_predicted_binding_ratio": 0.15593858043522474,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.796782287910794,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.288,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1530
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8933292658124873,
+ "eval_auc": 0.9189190864757253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585286458333334,
+ "eval_f1_macro": 0.797660321952579,
+ "eval_loss": 0.27837634086608887,
+ "eval_pr_auc": 0.6447520419594072,
+ "eval_precision": 0.664804469273743,
+ "eval_precision_macro": 0.7999811820052926,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7954010127288045,
+ "eval_runtime": 0.2401,
+ "eval_samples_per_second": 678.873,
+ "eval_steps_per_second": 4.165,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1548
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8936851738865161,
+ "eval_auc": 0.9193183038504471,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6602761982128351,
+ "eval_f1_macro": 0.7986291029941847,
+ "eval_loss": 0.27788689732551575,
+ "eval_pr_auc": 0.6462039154555116,
+ "eval_precision": 0.6653569089718402,
+ "eval_precision_macro": 0.8005067920325675,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.655272492744276,
+ "eval_recall_macro": 0.7967917965622751,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.0,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1566
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8941427699816962,
+ "eval_auc": 0.9197055599839506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.662012987012987,
+ "eval_f1_macro": 0.7996283888525269,
+ "eval_loss": 0.2774609923362732,
+ "eval_pr_auc": 0.6474319229516793,
+ "eval_precision": 0.6665576985943119,
+ "eval_precision_macro": 0.8013082309577014,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.7979808256923646,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.753,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1584
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9201319018332661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6636378387149116,
+ "eval_f1_macro": 0.8005736295133055,
+ "eval_loss": 0.27695581316947937,
+ "eval_pr_auc": 0.6490255599400047,
+ "eval_precision": 0.6678641410842586,
+ "eval_precision_macro": 0.8021363340613392,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.7990387969914977,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.045,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1602
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.920403555344157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6648262422864566,
+ "eval_f1_macro": 0.8013081720805965,
+ "eval_loss": 0.27657878398895264,
+ "eval_pr_auc": 0.6500340451314033,
+ "eval_precision": 0.6696107294733399,
+ "eval_precision_macro": 0.8030794000144978,
+ "eval_pred_class_0": 16611,
+ "eval_pred_class_1": 3057,
+ "eval_predicted_binding_ratio": 0.1554301403294692,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7995725369668047,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.642,
+ "eval_steps_per_second": 3.832,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1620
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9207465675374016,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6654746423927178,
+ "eval_f1_macro": 0.8017284012349317,
+ "eval_loss": 0.27611449360847473,
+ "eval_pr_auc": 0.6512249430736048,
+ "eval_precision": 0.6709275647328745,
+ "eval_precision_macro": 0.8037492731289094,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7997536198424009,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.736,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8954647142566605,
+ "eval_auc": 0.9211146506479597,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6662337662337663,
+ "eval_f1_macro": 0.802130627992697,
+ "eval_loss": 0.275691956281662,
+ "eval_pr_auc": 0.6524138573777828,
+ "eval_precision": 0.6708074534161491,
+ "eval_precision_macro": 0.8038244624537546,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8004692699557154,
+ "eval_runtime": 0.2702,
+ "eval_samples_per_second": 603.318,
+ "eval_steps_per_second": 3.701,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1656
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8956680902989628,
+ "eval_auc": 0.9214137790034065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024110910186859,
+ "eval_loss": 0.27531710267066956,
+ "eval_pr_auc": 0.6535015844647576,
+ "eval_precision": 0.6716857610474631,
+ "eval_precision_macro": 0.8042712197761243,
+ "eval_pred_class_0": 16613,
+ "eval_pred_class_1": 3055,
+ "eval_predicted_binding_ratio": 0.15532845230831807,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8005899918727795,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.638,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1674
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8956172462883872,
+ "eval_auc": 0.9217489565349906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6664500406173842,
+ "eval_f1_macro": 0.802288641055505,
+ "eval_loss": 0.27483227849006653,
+ "eval_pr_auc": 0.6546621751515824,
+ "eval_precision": 0.6715782580222659,
+ "eval_precision_macro": 0.8041892734676155,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8004287535625569,
+ "eval_runtime": 0.2382,
+ "eval_samples_per_second": 684.396,
+ "eval_steps_per_second": 4.199,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1692
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.8959731543624161,
+ "eval_auc": 0.9220882801111304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6672088484059857,
+ "eval_f1_macro": 0.8027800298435859,
+ "eval_loss": 0.27437010407447815,
+ "eval_pr_auc": 0.6561534178394561,
+ "eval_precision": 0.6731211027239908,
+ "eval_precision_macro": 0.8049740042228342,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.800640016917419,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.273,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1710
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9223952917907324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024646720298894,
+ "eval_loss": 0.27396437525749207,
+ "eval_pr_auc": 0.6571939169041311,
+ "eval_precision": 0.6726854891661195,
+ "eval_precision_macro": 0.8046979364973901,
+ "eval_pred_class_0": 16622,
+ "eval_pred_class_1": 3046,
+ "eval_predicted_binding_ratio": 0.1548708562131381,
+ "eval_recall": 0.6607545952918413,
+ "eval_recall_macro": 0.8002873598177079,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.423,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1728
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9226829844599047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6679655788277318,
+ "eval_f1_macro": 0.8031632457541016,
+ "eval_loss": 0.2735843360424042,
+ "eval_pr_auc": 0.6581443956834908,
+ "eval_precision": 0.6726618705035972,
+ "eval_precision_macro": 0.8049040839573975,
+ "eval_pred_class_0": 16610,
+ "eval_pred_class_1": 3058,
+ "eval_predicted_binding_ratio": 0.15548098434004473,
+ "eval_recall": 0.6633344082554015,
+ "eval_recall_macro": 0.801456544382424,
+ "eval_runtime": 0.2325,
+ "eval_samples_per_second": 701.163,
+ "eval_steps_per_second": 4.302,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1746
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8961765304047183,
+ "eval_auc": 0.9230679047936586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668075422626788,
+ "eval_f1_macro": 0.8032698713905396,
+ "eval_loss": 0.2730555534362793,
+ "eval_pr_auc": 0.6597232694357793,
+ "eval_precision": 0.6735496558505408,
+ "eval_precision_macro": 0.8053010360254088,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.801284970158309,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.817,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1764
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8961256863941428,
+ "eval_auc": 0.9233850285396773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6689353427321342,
+ "eval_f1_macro": 0.8036671286252258,
+ "eval_loss": 0.272703617811203,
+ "eval_pr_auc": 0.6608360801532023,
+ "eval_precision": 0.6723127035830619,
+ "eval_precision_macro": 0.8049176483332829,
+ "eval_pred_class_0": 16598,
+ "eval_pred_class_1": 3070,
+ "eval_predicted_binding_ratio": 0.1560911124669514,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8024343101576514,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.703,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1782
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9236704438040937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6696891191709845,
+ "eval_f1_macro": 0.804084607836397,
+ "eval_loss": 0.27227067947387695,
+ "eval_pr_auc": 0.661859432748859,
+ "eval_precision": 0.672520325203252,
+ "eval_precision_macro": 0.8051325786806955,
+ "eval_pred_class_0": 16593,
+ "eval_pred_class_1": 3075,
+ "eval_predicted_binding_ratio": 0.15634533251982916,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.8030490829192756,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.071,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1800
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9239191675474416,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6695821185617103,
+ "eval_f1_macro": 0.8040329626642457,
+ "eval_loss": 0.2719270884990692,
+ "eval_pr_auc": 0.6626733311213273,
+ "eval_precision": 0.6726326065733811,
+ "eval_precision_macro": 0.8051623412499325,
+ "eval_pred_class_0": 16595,
+ "eval_pred_class_1": 3073,
+ "eval_predicted_binding_ratio": 0.15624364449867806,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.802918025088319,
+ "eval_runtime": 0.2736,
+ "eval_samples_per_second": 595.794,
+ "eval_steps_per_second": 3.655,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1818
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9242257996595844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6714146973130463,
+ "eval_f1_macro": 0.805096334723234,
+ "eval_loss": 0.271486759185791,
+ "eval_pr_auc": 0.6637007390734015,
+ "eval_precision": 0.6740331491712708,
+ "eval_precision_macro": 0.8060660592459934,
+ "eval_pred_class_0": 16591,
+ "eval_pred_class_1": 3077,
+ "eval_predicted_binding_ratio": 0.15644702054098028,
+ "eval_recall": 0.6688165108029668,
+ "eval_recall_macro": 0.8041372346976746,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.758,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1836
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8973459426479561,
+ "eval_auc": 0.9244978230054357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.673248098397799,
+ "eval_f1_macro": 0.8061779895433214,
+ "eval_loss": 0.2711206376552582,
+ "eval_pr_auc": 0.6646142778873767,
+ "eval_precision": 0.6757634827810266,
+ "eval_precision_macro": 0.8071101922645338,
+ "eval_pred_class_0": 16590,
+ "eval_pred_class_1": 3078,
+ "eval_predicted_binding_ratio": 0.15649786455155582,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.8052555669553397,
+ "eval_runtime": 0.2242,
+ "eval_samples_per_second": 727.107,
+ "eval_steps_per_second": 4.461,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1854
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8971934106162294,
+ "eval_auc": 0.9248346842593396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6732385261797027,
+ "eval_f1_macro": 0.8061196854381076,
+ "eval_loss": 0.2707342207431793,
+ "eval_pr_auc": 0.6659244139495173,
+ "eval_precision": 0.6747651441528992,
+ "eval_precision_macro": 0.8066847854532062,
+ "eval_pred_class_0": 16581,
+ "eval_pred_class_1": 3087,
+ "eval_predicted_binding_ratio": 0.15695546064673582,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8055581990104113,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.197,
+ "eval_steps_per_second": 3.921,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8975493186902583,
+ "eval_auc": 0.9250955152313902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746326497658647,
+ "eval_f1_macro": 0.8069177490147248,
+ "eval_loss": 0.2704195976257324,
+ "eval_pr_auc": 0.666907608407933,
+ "eval_precision": 0.675614489003881,
+ "eval_precision_macro": 0.8072811827258788,
+ "eval_pred_class_0": 16576,
+ "eval_pred_class_1": 3092,
+ "eval_predicted_binding_ratio": 0.15720968069961358,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8065558093510122,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.977,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1890
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8977018507219849,
+ "eval_auc": 0.9254069649305167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6749596122778675,
+ "eval_f1_macro": 0.8071292359343842,
+ "eval_loss": 0.26996490359306335,
+ "eval_pr_auc": 0.6681450085536085,
+ "eval_precision": 0.6762706377468436,
+ "eval_precision_macro": 0.8076147808433838,
+ "eval_pred_class_0": 16579,
+ "eval_pred_class_1": 3089,
+ "eval_predicted_binding_ratio": 0.15705714866788692,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8066463507888102,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.086,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1908
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8980577587960138,
+ "eval_auc": 0.9256414889578861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6758286176232822,
+ "eval_f1_macro": 0.8076738937412058,
+ "eval_loss": 0.2696084678173065,
+ "eval_pr_auc": 0.6691135839215693,
+ "eval_precision": 0.6776913099870299,
+ "eval_precision_macro": 0.8083644683075526,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.673976136730087,
+ "eval_recall_macro": 0.8069886719746289,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.158,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1926
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9258192627838369,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6769726247987118,
+ "eval_f1_macro": 0.8082079811791663,
+ "eval_loss": 0.26944610476493835,
+ "eval_pr_auc": 0.6696389857739906,
+ "eval_precision": 0.676101640398842,
+ "eval_precision_macro": 0.8078859551713395,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8085311854668409,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.512,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1944
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9261110139050743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.676779086654833,
+ "eval_f1_macro": 0.8081683537924276,
+ "eval_loss": 0.26896923780441284,
+ "eval_pr_auc": 0.6708410126864026,
+ "eval_precision": 0.6773255813953488,
+ "eval_precision_macro": 0.8083707318031536,
+ "eval_pred_class_0": 16572,
+ "eval_pred_class_1": 3096,
+ "eval_predicted_binding_ratio": 0.1574130567419158,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8079664377498563,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.096,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1962
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8985153548911938,
+ "eval_auc": 0.9263511243868452,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6783757653883339,
+ "eval_f1_macro": 0.809064127789247,
+ "eval_loss": 0.26861709356307983,
+ "eval_pr_auc": 0.6718712574127733,
+ "eval_precision": 0.677938808373591,
+ "eval_precision_macro": 0.808902387342021,
+ "eval_pred_class_0": 16563,
+ "eval_pred_class_1": 3105,
+ "eval_predicted_binding_ratio": 0.15787065283709578,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8092261637523704,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.936,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1980
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9265832055569767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6794644297467334,
+ "eval_f1_macro": 0.8097506232989936,
+ "eval_loss": 0.2682516574859619,
+ "eval_pr_auc": 0.6727910026400046,
+ "eval_precision": 0.6797934151065204,
+ "eval_precision_macro": 0.8098725675411902,
+ "eval_pred_class_0": 16570,
+ "eval_pred_class_1": 3098,
+ "eval_predicted_binding_ratio": 0.1575147447630669,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.809628845896721,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.644,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1998
+ },
+ {
+ "epoch": 111.11111111111111,
+ "grad_norm": 13330.4609375,
+ "learning_rate": 9.552616846852138e-07,
+ "loss": 0.252,
+ "step": 2000
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.899176327028676,
+ "eval_auc": 0.9269119888367457,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6793856103476152,
+ "eval_f1_macro": 0.8097842051315767,
+ "eval_loss": 0.2677896022796631,
+ "eval_pr_auc": 0.6743175064299574,
+ "eval_precision": 0.6812581063553826,
+ "eval_precision_macro": 0.8104795114507255,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.6775233795549823,
+ "eval_recall_macro": 0.8090942786590025,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.193,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2016
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.927058521341044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812832500403031,
+ "eval_f1_macro": 0.8108073208521016,
+ "eval_loss": 0.26760444045066833,
+ "eval_pr_auc": 0.6746134464200654,
+ "eval_precision": 0.6811734364925854,
+ "eval_precision_macro": 0.8107666047608406,
+ "eval_pred_class_0": 16566,
+ "eval_pred_class_1": 3102,
+ "eval_predicted_binding_ratio": 0.15771812080536912,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8108480555060766,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.901,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2034
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8993797030709783,
+ "eval_auc": 0.9272620862892311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6811664250040277,
+ "eval_f1_macro": 0.81071512110173,
+ "eval_loss": 0.267299622297287,
+ "eval_pr_auc": 0.6753372489001316,
+ "eval_precision": 0.6806181584030908,
+ "eval_precision_macro": 0.8105119532505733,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8109187523785011,
+ "eval_runtime": 0.2924,
+ "eval_samples_per_second": 557.453,
+ "eval_steps_per_second": 3.42,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2052
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.9274648142425077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6817962337035248,
+ "eval_f1_macro": 0.8110548055574955,
+ "eval_loss": 0.2670327126979828,
+ "eval_pr_auc": 0.6759104665767571,
+ "eval_precision": 0.6805912596401028,
+ "eval_precision_macro": 0.8106085073266955,
+ "eval_pred_class_0": 16556,
+ "eval_pred_class_1": 3112,
+ "eval_predicted_binding_ratio": 0.15822656091112466,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.811503344660859,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.841,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2070
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8995830791132805,
+ "eval_auc": 0.927707601161492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6824248271426274,
+ "eval_f1_macro": 0.8113938913621764,
+ "eval_loss": 0.2667410373687744,
+ "eval_pr_auc": 0.676645416517246,
+ "eval_precision": 0.6805644644002565,
+ "eval_precision_macro": 0.8107051929252038,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.812087936943217,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.094,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2088
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.927944061956154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6828796907714608,
+ "eval_f1_macro": 0.8117208850210732,
+ "eval_loss": 0.26635268330574036,
+ "eval_pr_auc": 0.6777081363329963,
+ "eval_precision": 0.6821106821106822,
+ "eval_precision_macro": 0.8114357758379498,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8120069041569002,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.137,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9282739839383012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6831315577078289,
+ "eval_f1_macro": 0.8119498952052617,
+ "eval_loss": 0.2659379541873932,
+ "eval_pr_auc": 0.6790830976589266,
+ "eval_precision": 0.6839043309631545,
+ "eval_precision_macro": 0.8122369488772572,
+ "eval_pred_class_0": 16574,
+ "eval_pred_class_1": 3094,
+ "eval_predicted_binding_ratio": 0.1573113687207647,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8116637557086703,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.745,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2124
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9284926050623749,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6838460299565148,
+ "eval_f1_macro": 0.812294615183528,
+ "eval_loss": 0.2656570076942444,
+ "eval_pr_auc": 0.6798372835892805,
+ "eval_precision": 0.6830759330759331,
+ "eval_precision_macro": 0.8120089810307202,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.8125811605253657,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.824,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2142
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9287595480437707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6841512469831054,
+ "eval_f1_macro": 0.8124418563951485,
+ "eval_loss": 0.26531943678855896,
+ "eval_pr_auc": 0.6808986584956077,
+ "eval_precision": 0.682723185613359,
+ "eval_precision_macro": 0.8119125170545953,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8129743340182352,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.857,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2160
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9289936341086871,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851344822032533,
+ "eval_f1_macro": 0.8130595887334677,
+ "eval_loss": 0.26495063304901123,
+ "eval_pr_auc": 0.6818525990139518,
+ "eval_precision": 0.6843629343629344,
+ "eval_precision_macro": 0.8127732546210807,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.8133468356833198,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.159,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2178
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.9010067114093959,
+ "eval_auc": 0.9291471740122346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6880307643005928,
+ "eval_f1_macro": 0.8146000626156236,
+ "eval_loss": 0.2648627460002899,
+ "eval_pr_auc": 0.6821768129155847,
+ "eval_precision": 0.6837579617834395,
+ "eval_precision_macro": 0.8130188647252145,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.692357304095453,
+ "eval_recall_macro": 0.8162094361365779,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.373,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2196
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9293803062922532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687851067244423,
+ "eval_f1_macro": 0.8145493064661928,
+ "eval_loss": 0.26444998383522034,
+ "eval_pr_auc": 0.6832369783380787,
+ "eval_precision": 0.6846645367412141,
+ "eval_precision_macro": 0.8133686693864494,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8157455657712839,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.052,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2214
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.929644952403895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6885774125986789,
+ "eval_f1_macro": 0.815114870687036,
+ "eval_loss": 0.26401567459106445,
+ "eval_pr_auc": 0.6842971435384069,
+ "eval_precision": 0.6880231809401159,
+ "eval_precision_macro": 0.8149088251035563,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8153213845367371,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 729.933,
+ "eval_steps_per_second": 4.478,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2232
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9298876614628875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6892782510850346,
+ "eval_f1_macro": 0.8154529561328843,
+ "eval_loss": 0.26374292373657227,
+ "eval_pr_auc": 0.6852698748697685,
+ "eval_precision": 0.6871794871794872,
+ "eval_precision_macro": 0.8146738625165021,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.691389874234118,
+ "eval_recall_macro": 0.8162387893534325,
+ "eval_runtime": 0.2352,
+ "eval_samples_per_second": 692.981,
+ "eval_steps_per_second": 4.251,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.9020235916209071,
+ "eval_auc": 0.9300761410376911,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909382518043304,
+ "eval_f1_macro": 0.8163612439650636,
+ "eval_loss": 0.2635449767112732,
+ "eval_pr_auc": 0.685931037905785,
+ "eval_precision": 0.6873005743458839,
+ "eval_precision_macro": 0.815012329026093,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.6946146404385682,
+ "eval_recall_macro": 0.8177304505385936,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.44,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2268
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9303049910181687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6925181013676589,
+ "eval_f1_macro": 0.8174102840403102,
+ "eval_loss": 0.263118714094162,
+ "eval_pr_auc": 0.6869422132706717,
+ "eval_precision": 0.691072575465639,
+ "eval_precision_macro": 0.8168725206674576,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6939696871976782,
+ "eval_recall_macro": 0.8179512225449368,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.976,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2286
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9304756990498765,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.2629205286502838,
+ "eval_pr_auc": 0.6875064110834537,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2304
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9306633221647718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937530110807772,
+ "eval_f1_macro": 0.818077689508494,
+ "eval_loss": 0.2625824213027954,
+ "eval_pr_auc": 0.6881187823465719,
+ "eval_precision": 0.690978886756238,
+ "eval_precision_macro": 0.8170466915947796,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.6965495001612383,
+ "eval_recall_macro": 0.8191204071096527,
+ "eval_runtime": 0.2718,
+ "eval_samples_per_second": 599.717,
+ "eval_steps_per_second": 3.679,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2322
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9308322783466673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2622954547405243,
+ "eval_pr_auc": 0.6887035254510873,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.881,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9309924460820045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.26203182339668274,
+ "eval_pr_auc": 0.6893090005690568,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.443,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2358
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9312000694822565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6936286310383566,
+ "eval_f1_macro": 0.817981812876073,
+ "eval_loss": 0.26177045702934265,
+ "eval_pr_auc": 0.6902838377634022,
+ "eval_precision": 0.6904153354632588,
+ "eval_precision_macro": 0.8167882699809945,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6968719767816833,
+ "eval_recall_macro": 0.8191911039820772,
+ "eval_runtime": 0.2756,
+ "eval_samples_per_second": 591.34,
+ "eval_steps_per_second": 3.628,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2376
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9314264084780033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2614164650440216,
+ "eval_pr_auc": 0.6912123921690412,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.769,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2394
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9316330780933575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934576434656807,
+ "eval_f1_macro": 0.8179352236654993,
+ "eval_loss": 0.26109954714775085,
+ "eval_pr_auc": 0.6919150376493911,
+ "eval_precision": 0.6913461538461538,
+ "eval_precision_macro": 0.817149992562429,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8187272336167832,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.557,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2412
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9318176062735843,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694733472066592,
+ "eval_f1_macro": 0.8185505131193367,
+ "eval_loss": 0.2609698474407196,
+ "eval_pr_auc": 0.69244594350898,
+ "eval_precision": 0.6897647806738716,
+ "eval_precision_macro": 0.8167078351983328,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.6997742663656885,
+ "eval_recall_macro": 0.8204309854192178,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.333,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2430
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9320372006475538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941536479304236,
+ "eval_f1_macro": 0.8184144035830462,
+ "eval_loss": 0.2604828178882599,
+ "eval_pr_auc": 0.6935318303087233,
+ "eval_precision": 0.6933719433719434,
+ "eval_precision_macro": 0.8181231697536046,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6949371170590132,
+ "eval_recall_macro": 0.8187065617889984,
+ "eval_runtime": 0.2485,
+ "eval_samples_per_second": 655.847,
+ "eval_steps_per_second": 4.024,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2448
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9321443165310757,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960972488803583,
+ "eval_f1_macro": 0.8193338378363828,
+ "eval_loss": 0.2604370415210724,
+ "eval_pr_auc": 0.693643099537468,
+ "eval_precision": 0.6905744208187877,
+ "eval_precision_macro": 0.8172857573610195,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8214285957598189,
+ "eval_runtime": 0.2215,
+ "eval_samples_per_second": 736.024,
+ "eval_steps_per_second": 4.515,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2466
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9323432098797633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695638229634381,
+ "eval_f1_macro": 0.8191484199531422,
+ "eval_loss": 0.2601032257080078,
+ "eval_pr_auc": 0.694473981068256,
+ "eval_precision": 0.691866028708134,
+ "eval_precision_macro": 0.817746962215919,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8205715519016554,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.928,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2484
+ },
+ {
+ "epoch": 138.88888888888889,
+ "grad_norm": 12954.3583984375,
+ "learning_rate": 9.068887706579789e-07,
+ "loss": 0.2385,
+ "step": 2500
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9325454122780963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969404186795491,
+ "eval_f1_macro": 0.8200635197304043,
+ "eval_loss": 0.25969693064689636,
+ "eval_pr_auc": 0.6954050242581626,
+ "eval_precision": 0.6960437439691219,
+ "eval_precision_macro": 0.819729100681946,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.820399150415129,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.439,
+ "eval_steps_per_second": 3.763,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2502
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9326487714170208,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971977582065653,
+ "eval_f1_macro": 0.8200261554019741,
+ "eval_loss": 0.2596379518508911,
+ "eval_pr_auc": 0.6956168134976223,
+ "eval_precision": 0.6924300254452926,
+ "eval_precision_macro": 0.8182556808417458,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8218312779041694,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 626.028,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2520
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9329799294265357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962556488056811,
+ "eval_f1_macro": 0.819733135205496,
+ "eval_loss": 0.25904589891433716,
+ "eval_pr_auc": 0.6970902220334548,
+ "eval_precision": 0.6969305331179322,
+ "eval_precision_macro": 0.8199852086334245,
+ "eval_pred_class_0": 16573,
+ "eval_pred_class_1": 3095,
+ "eval_predicted_binding_ratio": 0.15736221273134024,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8194817455984336,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.771,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2538
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9330630060376336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994061948322902,
+ "eval_f1_macro": 0.8214143192859533,
+ "eval_loss": 0.2590080201625824,
+ "eval_pr_auc": 0.697179333543334,
+ "eval_precision": 0.6961661341853035,
+ "eval_precision_macro": 0.8202078705755396,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.7026765559496937,
+ "eval_recall_macro": 0.8226366421928706,
+ "eval_runtime": 0.2495,
+ "eval_samples_per_second": 653.345,
+ "eval_steps_per_second": 4.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2556
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9332962551076398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991164658634538,
+ "eval_f1_macro": 0.8212745809731632,
+ "eval_loss": 0.25865858793258667,
+ "eval_pr_auc": 0.6981950669404262,
+ "eval_precision": 0.6965428937259923,
+ "eval_precision_macro": 0.8203156925109651,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8222434687000011,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.149,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9334749729859892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.2584296464920044,
+ "eval_pr_auc": 0.6986932637956595,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2769,
+ "eval_samples_per_second": 588.703,
+ "eval_steps_per_second": 3.612,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2592
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.9050742322554403,
+ "eval_auc": 0.9336868786857826,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991136180499598,
+ "eval_f1_macro": 0.8213807805320277,
+ "eval_loss": 0.25803840160369873,
+ "eval_pr_auc": 0.699660351667112,
+ "eval_precision": 0.6987757731958762,
+ "eval_precision_macro": 0.8212545854629465,
+ "eval_pred_class_0": 16564,
+ "eval_pred_class_1": 3104,
+ "eval_predicted_binding_ratio": 0.15781980882652025,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8215071467589017,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.163,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2610
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.9047183241814114,
+ "eval_auc": 0.9337996783486955,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995831997435076,
+ "eval_f1_macro": 0.8214817322060338,
+ "eval_loss": 0.2579362094402313,
+ "eval_pr_auc": 0.6999214829412527,
+ "eval_precision": 0.6955690149824674,
+ "eval_precision_macro": 0.8199882459220607,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7036439858110287,
+ "eval_recall_macro": 0.8229996352064741,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.317,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2628
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9339161666287131,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.701647736362182,
+ "eval_f1_macro": 0.8226388900943448,
+ "eval_loss": 0.2578524649143219,
+ "eval_pr_auc": 0.7001081914566338,
+ "eval_precision": 0.6961904761904761,
+ "eval_precision_macro": 0.820610070399391,
+ "eval_pred_class_0": 16518,
+ "eval_pred_class_1": 3150,
+ "eval_predicted_binding_ratio": 0.16015863331299574,
+ "eval_recall": 0.7071912286359239,
+ "eval_recall_macro": 0.8247128956603896,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.43,
+ "eval_steps_per_second": 3.954,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2646
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9341703697689739,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.25731131434440613,
+ "eval_pr_auc": 0.7012902373057504,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.858,
+ "eval_steps_per_second": 3.981,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2664
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.9052776082977425,
+ "eval_auc": 0.9343408636857047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003377834968635,
+ "eval_f1_macro": 0.8220430425380088,
+ "eval_loss": 0.25706911087036133,
+ "eval_pr_auc": 0.7018656885391451,
+ "eval_precision": 0.6986521181001284,
+ "eval_precision_macro": 0.8214140242506442,
+ "eval_pred_class_0": 16552,
+ "eval_pred_class_1": 3116,
+ "eval_predicted_binding_ratio": 0.1584299369534269,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8226763313236177,
+ "eval_runtime": 0.2513,
+ "eval_samples_per_second": 648.672,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2682
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9345392996069414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006758931445124,
+ "eval_f1_macro": 0.822259931959612,
+ "eval_loss": 0.2567782402038574,
+ "eval_pr_auc": 0.7026441101697649,
+ "eval_precision": 0.6993254095727593,
+ "eval_precision_macro": 0.8217557280421937,
+ "eval_pred_class_0": 16555,
+ "eval_pred_class_1": 3113,
+ "eval_predicted_binding_ratio": 0.15827740492170023,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8227668727614157,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.188,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2700
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.9051250762660159,
+ "eval_auc": 0.934685296823797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003853564547207,
+ "eval_f1_macro": 0.8220121780461352,
+ "eval_loss": 0.25664329528808594,
+ "eval_pr_auc": 0.7029644506395569,
+ "eval_precision": 0.6974736168851935,
+ "eval_precision_macro": 0.8209271234175075,
+ "eval_pred_class_0": 16541,
+ "eval_pred_class_1": 3127,
+ "eval_predicted_binding_ratio": 0.158989221069758,
+ "eval_recall": 0.7033215091905837,
+ "eval_recall_macro": 0.8231100212096456,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.856,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2718
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9349076649599691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008216529724505,
+ "eval_f1_macro": 0.8223840221758023,
+ "eval_loss": 0.25626465678215027,
+ "eval_pr_auc": 0.7040942732775656,
+ "eval_precision": 0.7002575660012879,
+ "eval_precision_macro": 0.82217322207805,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8225952985373008,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.699,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2736
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.905226764287167,
+ "eval_auc": 0.9350594919437002,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7014734144778988,
+ "eval_f1_macro": 0.8225728005545544,
+ "eval_loss": 0.2562600076198578,
+ "eval_pr_auc": 0.7046022469135804,
+ "eval_precision": 0.6967865097041044,
+ "eval_precision_macro": 0.8208289583316286,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7062237987745889,
+ "eval_recall_macro": 0.8243499026467862,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.279,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2754
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9352728672508359,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010773436243769,
+ "eval_f1_macro": 0.8224715159707777,
+ "eval_loss": 0.25581732392311096,
+ "eval_pr_auc": 0.7056629719926804,
+ "eval_precision": 0.6991661321359846,
+ "eval_precision_macro": 0.821758292654095,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.7029990325701386,
+ "eval_recall_macro": 0.8231902267335512,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.5,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2772
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.9354356530283926,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7025204687750842,
+ "eval_f1_macro": 0.8232752161134611,
+ "eval_loss": 0.25560733675956726,
+ "eval_pr_auc": 0.7062858637533224,
+ "eval_precision": 0.6994884910485933,
+ "eval_precision_macro": 0.8221444873622652,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7055788455336988,
+ "eval_recall_macro": 0.8244197722567993,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.27,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2790
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9356189159837551,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7028415475999358,
+ "eval_f1_macro": 0.8234659606184656,
+ "eval_loss": 0.2553412616252899,
+ "eval_pr_auc": 0.7070636650718337,
+ "eval_precision": 0.6998081841432225,
+ "eval_precision_macro": 0.8223345636556499,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7059013221541438,
+ "eval_recall_macro": 0.8246111910462879,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.017,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9357918624902231,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7031199742682535,
+ "eval_f1_macro": 0.8236899466727462,
+ "eval_loss": 0.25509998202323914,
+ "eval_pr_auc": 0.7077594222055618,
+ "eval_precision": 0.7013153673403913,
+ "eval_precision_macro": 0.8230158493399438,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8243689199497484,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.309,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2826
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9359727409833409,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7034593724859212,
+ "eval_f1_macro": 0.8239074586532139,
+ "eval_loss": 0.2548539340496063,
+ "eval_pr_auc": 0.7084539339673516,
+ "eval_precision": 0.7019910083493899,
+ "eval_precision_macro": 0.8233586792381238,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8244594613875464,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.242,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2844
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9360812778117108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7043380822794941,
+ "eval_f1_macro": 0.8242594639388645,
+ "eval_loss": 0.2548294961452484,
+ "eval_pr_auc": 0.708809734771939,
+ "eval_precision": 0.6993006993006993,
+ "eval_precision_macro": 0.822383674913635,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.709448564979039,
+ "eval_recall_macro": 0.8261735491038733,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.758,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2862
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9362697476540152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7040308334671591,
+ "eval_f1_macro": 0.82418310527748,
+ "eval_loss": 0.25451889634132385,
+ "eval_pr_auc": 0.70979034812957,
+ "eval_precision": 0.7012156110044786,
+ "eval_precision_macro": 0.8231322886360805,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8252458083732854,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.302,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2880
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9364340127714132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7047863796980405,
+ "eval_f1_macro": 0.824637224883753,
+ "eval_loss": 0.25425252318382263,
+ "eval_pr_auc": 0.7104452483887299,
+ "eval_precision": 0.70208,
+ "eval_precision_macro": 0.8236265925164723,
+ "eval_pred_class_0": 16543,
+ "eval_pred_class_1": 3125,
+ "eval_predicted_binding_ratio": 0.15888753304860687,
+ "eval_recall": 0.7075137052563689,
+ "eval_recall_macro": 0.8256588264315284,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.767,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2898
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9366581522223957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7045965927354548,
+ "eval_f1_macro": 0.8245456841795291,
+ "eval_loss": 0.25392982363700867,
+ "eval_pr_auc": 0.7114160919166963,
+ "eval_precision": 0.7023389939122077,
+ "eval_precision_macro": 0.8237022823552698,
+ "eval_pred_class_0": 16547,
+ "eval_pred_class_1": 3121,
+ "eval_predicted_binding_ratio": 0.15868415700630464,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8253967107696154,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.473,
+ "eval_steps_per_second": 4.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2916
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.936698911928028,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7069544364508393,
+ "eval_f1_macro": 0.8257724934589374,
+ "eval_loss": 0.25399070978164673,
+ "eval_pr_auc": 0.7113652786898896,
+ "eval_precision": 0.7010145846544071,
+ "eval_precision_macro": 0.8235604593370134,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7129958078039342,
+ "eval_recall_macro": 0.8280377119541189,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.019,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2934
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9369248810888143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7062780269058296,
+ "eval_f1_macro": 0.8254283885284618,
+ "eval_loss": 0.25362086296081543,
+ "eval_pr_auc": 0.7123203296069245,
+ "eval_precision": 0.7015590200445434,
+ "eval_precision_macro": 0.8236690712930735,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7110609480812641,
+ "eval_recall_macro": 0.827221184489114,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.444,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2952
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.9072096806996136,
+ "eval_auc": 0.9371562809840187,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7070155723230053,
+ "eval_f1_macro": 0.8259456391835222,
+ "eval_loss": 0.2532632350921631,
+ "eval_pr_auc": 0.7133697274093473,
+ "eval_precision": 0.7039641943734015,
+ "eval_precision_macro": 0.8248055554696512,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.8270996353096386,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.417,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2970
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9071588366890381,
+ "eval_auc": 0.9372476886142239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7072779737095223,
+ "eval_f1_macro": 0.8260542385315996,
+ "eval_loss": 0.2531469762325287,
+ "eval_pr_auc": 0.7136781835058345,
+ "eval_precision": 0.7032196365954734,
+ "eval_precision_macro": 0.8245394656269969,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7113834247017091,
+ "eval_recall_macro": 0.8275936861541986,
+ "eval_runtime": 0.2653,
+ "eval_samples_per_second": 614.378,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2988
+ },
+ {
+ "epoch": 166.66666666666666,
+ "grad_norm": 14056.4111328125,
+ "learning_rate": 8.432618494003656e-07,
+ "loss": 0.2279,
+ "step": 3000
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.9072605247101891,
+ "eval_auc": 0.9373718655684177,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7077859660365268,
+ "eval_f1_macro": 0.8263351175441593,
+ "eval_loss": 0.25298377871513367,
+ "eval_pr_auc": 0.7140728736694715,
+ "eval_precision": 0.7032792104425343,
+ "eval_precision_macro": 0.8246534613355044,
+ "eval_pred_class_0": 16527,
+ "eval_pred_class_1": 3141,
+ "eval_predicted_binding_ratio": 0.15970103721781573,
+ "eval_recall": 0.7123508545630441,
+ "eval_recall_macro": 0.8280472206056,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.943,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3006
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.9075655887736425,
+ "eval_auc": 0.9374749229998747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7094918504314478,
+ "eval_f1_macro": 0.8272654245808608,
+ "eval_loss": 0.25285741686820984,
+ "eval_pr_auc": 0.7143683695359583,
+ "eval_precision": 0.7031992397846056,
+ "eval_precision_macro": 0.8249204363177162,
+ "eval_pred_class_0": 16511,
+ "eval_pred_class_1": 3157,
+ "eval_predicted_binding_ratio": 0.16051454138702462,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.8296699396217176,
+ "eval_runtime": 0.2383,
+ "eval_samples_per_second": 683.908,
+ "eval_steps_per_second": 4.196,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3024
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.9076672767947935,
+ "eval_auc": 0.9377269168628721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7087876844130853,
+ "eval_f1_macro": 0.8269618180373584,
+ "eval_loss": 0.25237372517585754,
+ "eval_pr_auc": 0.7154735567799367,
+ "eval_precision": 0.7049441786283892,
+ "eval_precision_macro": 0.8255259815297635,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8284197222706846,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.364,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9378333125414714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7099151049175076,
+ "eval_f1_macro": 0.8275952704051472,
+ "eval_loss": 0.2522483766078949,
+ "eval_pr_auc": 0.7157732693940176,
+ "eval_precision": 0.7052832590706556,
+ "eval_precision_macro": 0.8258656401852128,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8293569716527538,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.855,
+ "eval_steps_per_second": 4.521,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3060
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9379969158489405,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7098221438871976,
+ "eval_f1_macro": 0.8275504434498686,
+ "eval_loss": 0.25200438499450684,
+ "eval_pr_auc": 0.7163721445757975,
+ "eval_precision": 0.7054140127388535,
+ "eval_precision_macro": 0.8259040054013725,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7142857142857143,
+ "eval_recall_macro": 0.8292259138217972,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.089,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3078
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9083282489322758,
+ "eval_auc": 0.9382153715205318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7102683593122289,
+ "eval_f1_macro": 0.8279092226905722,
+ "eval_loss": 0.25164341926574707,
+ "eval_pr_auc": 0.7173810220120935,
+ "eval_precision": 0.7078795643818065,
+ "eval_precision_macro": 0.8270148456503497,
+ "eval_pred_class_0": 16546,
+ "eval_pred_class_1": 3122,
+ "eval_predicted_binding_ratio": 0.1587350010168802,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8288120685011429,
+ "eval_runtime": 0.2387,
+ "eval_samples_per_second": 682.942,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3096
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.9084807809640024,
+ "eval_auc": 0.9383315970230777,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.711168164313222,
+ "eval_f1_macro": 0.8283970352740591,
+ "eval_loss": 0.2515103816986084,
+ "eval_pr_auc": 0.7177908159595219,
+ "eval_precision": 0.7077610986905142,
+ "eval_precision_macro": 0.8271223707155178,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8296889569246799,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.31,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3114
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.9088366890380313,
+ "eval_auc": 0.9384487568455233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7133493205435651,
+ "eval_f1_macro": 0.8295745121505045,
+ "eval_loss": 0.25142860412597656,
+ "eval_pr_auc": 0.7181535155231535,
+ "eval_precision": 0.7073557387444515,
+ "eval_precision_macro": 0.8273365831908039,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7194453402128346,
+ "eval_recall_macro": 0.8318660877438894,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.694,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3132
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9090400650803335,
+ "eval_auc": 0.9386484385266265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7135308246597278,
+ "eval_f1_macro": 0.8297338931856857,
+ "eval_loss": 0.2510823905467987,
+ "eval_pr_auc": 0.7190368898285651,
+ "eval_precision": 0.7086513994910941,
+ "eval_precision_macro": 0.8279095777411898,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.8315936361680839,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 616.851,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3150
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.9090909090909091,
+ "eval_auc": 0.9388042461024309,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7136450992953235,
+ "eval_f1_macro": 0.8298069567551197,
+ "eval_loss": 0.25084683299064636,
+ "eval_pr_auc": 0.7196986972872739,
+ "eval_precision": 0.7088768692332167,
+ "eval_precision_macro": 0.8280239111672891,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.83162381664735,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.711,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3168
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.9093451291437868,
+ "eval_auc": 0.9389100189010969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7144001281435207,
+ "eval_f1_macro": 0.8302608322100373,
+ "eval_loss": 0.25058600306510925,
+ "eval_pr_auc": 0.7201232901620662,
+ "eval_precision": 0.7097390197326544,
+ "eval_precision_macro": 0.8285170954889824,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7191228635923895,
+ "eval_recall_macro": 0.832036834705593,
+ "eval_runtime": 0.2429,
+ "eval_samples_per_second": 671.088,
+ "eval_steps_per_second": 4.117,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3186
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.9091417531014846,
+ "eval_auc": 0.9390935349014322,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.713392141138733,
+ "eval_f1_macro": 0.8297029283682245,
+ "eval_loss": 0.2502758800983429,
+ "eval_pr_auc": 0.7210120001490467,
+ "eval_precision": 0.7096362476068921,
+ "eval_precision_macro": 0.8282970157836081,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.7171880038697195,
+ "eval_recall_macro": 0.83112976580279,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.763,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3204
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9097518812283913,
+ "eval_auc": 0.939160767004228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7162270183852918,
+ "eval_f1_macro": 0.8312854205617097,
+ "eval_loss": 0.2502012550830841,
+ "eval_pr_auc": 0.7211443774602971,
+ "eval_precision": 0.7102092580849715,
+ "eval_precision_macro": 0.8290358389250096,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7223476297968398,
+ "eval_recall_macro": 0.8335888568492861,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.383,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3222
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9099552572706935,
+ "eval_auc": 0.9392979952395233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7171378374061651,
+ "eval_f1_macro": 0.8317964319305957,
+ "eval_loss": 0.2500424385070801,
+ "eval_pr_auc": 0.7216701546304439,
+ "eval_precision": 0.7104430379746836,
+ "eval_precision_macro": 0.8292946956289701,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8343648679211326,
+ "eval_runtime": 0.2706,
+ "eval_samples_per_second": 602.263,
+ "eval_steps_per_second": 3.695,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3240
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.910006101281269,
+ "eval_auc": 0.9394494621207929,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7173427020121367,
+ "eval_f1_macro": 0.8319131723763289,
+ "eval_loss": 0.24980410933494568,
+ "eval_pr_auc": 0.722382187322872,
+ "eval_precision": 0.7105346409364125,
+ "eval_precision_macro": 0.8293692166334694,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7242824895195098,
+ "eval_recall_macro": 0.8345261062313551,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.516,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3258
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9102094773235713,
+ "eval_auc": 0.9395796926893379,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7179814755669115,
+ "eval_f1_macro": 0.8322930296138966,
+ "eval_loss": 0.2495713084936142,
+ "eval_pr_auc": 0.7229232947500771,
+ "eval_precision": 0.7111673521037646,
+ "eval_precision_macro": 0.8297461525769929,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8349089438103321,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.403,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.910362009355298,
+ "eval_auc": 0.9396445500623882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7187749242303397,
+ "eval_f1_macro": 0.8327294647159501,
+ "eval_loss": 0.2494269460439682,
+ "eval_pr_auc": 0.7232071190390562,
+ "eval_precision": 0.7111742424242424,
+ "eval_precision_macro": 0.8298901515151516,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.726539825862625,
+ "eval_recall_macro": 0.8356547744029127,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.015,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3294
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9106670734187513,
+ "eval_auc": 0.9397990826808293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7191945021575835,
+ "eval_f1_macro": 0.8330396163256251,
+ "eval_loss": 0.24917152523994446,
+ "eval_pr_auc": 0.7237798760580164,
+ "eval_precision": 0.7129277566539924,
+ "eval_precision_macro": 0.830694740730097,
+ "eval_pred_class_0": 16512,
+ "eval_pred_class_1": 3156,
+ "eval_predicted_binding_ratio": 0.16046369737644905,
+ "eval_recall": 0.7255723960012899,
+ "eval_recall_macro": 0.8354426837856392,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.837,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3312
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9110738255033557,
+ "eval_auc": 0.9399604475135382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7199359487590072,
+ "eval_f1_macro": 0.8335408491792982,
+ "eval_loss": 0.24888525903224945,
+ "eval_pr_auc": 0.7244599264333298,
+ "eval_precision": 0.7150127226463104,
+ "eval_precision_macro": 0.8316954196625403,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8354220119578544,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.342,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3330
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9109721374822046,
+ "eval_auc": 0.9401063571379032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7194359878224643,
+ "eval_f1_macro": 0.8332638467590944,
+ "eval_loss": 0.2486649453639984,
+ "eval_pr_auc": 0.7249907062273525,
+ "eval_precision": 0.714968152866242,
+ "eval_precision_macro": 0.8315886262879129,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8349684775064528,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.8,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3348
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9112263575350824,
+ "eval_auc": 0.9402670406956853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7203715566944267,
+ "eval_f1_macro": 0.8338047799185901,
+ "eval_loss": 0.24845102429389954,
+ "eval_pr_auc": 0.7255585294747666,
+ "eval_precision": 0.7155583837098314,
+ "eval_precision_macro": 0.832000069313312,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.8356436112266088,
+ "eval_runtime": 0.2397,
+ "eval_samples_per_second": 680.081,
+ "eval_steps_per_second": 4.172,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3366
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9114805775879601,
+ "eval_auc": 0.9404174175370716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7215736446505677,
+ "eval_f1_macro": 0.8344742146415792,
+ "eval_loss": 0.24825866520404816,
+ "eval_pr_auc": 0.7262464197023197,
+ "eval_precision": 0.7157360406091371,
+ "eval_precision_macro": 0.8322867657635175,
+ "eval_pred_class_0": 16516,
+ "eval_pred_class_1": 3152,
+ "eval_predicted_binding_ratio": 0.16026032133414683,
+ "eval_recall": 0.72750725572396,
+ "eval_recall_macro": 0.8367119184396343,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.496,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3384
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9114297335773845,
+ "eval_auc": 0.9404972726910148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7223461906279885,
+ "eval_f1_macro": 0.8348286515416876,
+ "eval_loss": 0.24819281697273254,
+ "eval_pr_auc": 0.7264794032375063,
+ "eval_precision": 0.7141506460762685,
+ "eval_precision_macro": 0.8317646228259488,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8379923162699333,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.696,
+ "eval_steps_per_second": 4.035,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3402
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9116839536302623,
+ "eval_auc": 0.940656301723974,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.722922316158877,
+ "eval_f1_macro": 0.8351963018783921,
+ "eval_loss": 0.24791164696216583,
+ "eval_pr_auc": 0.7271914277283732,
+ "eval_precision": 0.7152777777777778,
+ "eval_precision_macro": 0.8323358585858586,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8381432186662634,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.211,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3420
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.9118873296725646,
+ "eval_auc": 0.9407912914845091,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7233838786911413,
+ "eval_f1_macro": 0.8354907358742514,
+ "eval_loss": 0.24768619239330292,
+ "eval_pr_auc": 0.727892471269696,
+ "eval_precision": 0.7161820480404552,
+ "eval_precision_macro": 0.8327941262984632,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8382639405833274,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.424,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3438
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9119381736831401,
+ "eval_auc": 0.9409245683252279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7240280433397068,
+ "eval_f1_macro": 0.8358192243316804,
+ "eval_loss": 0.2475385069847107,
+ "eval_pr_auc": 0.7283869598485145,
+ "eval_precision": 0.7155905511811024,
+ "eval_precision_macro": 0.832663401462133,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7326668816510803,
+ "eval_recall_macro": 0.8390804680483324,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.682,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3456
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9120398617042912,
+ "eval_auc": 0.9410766288889338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7232885476647473,
+ "eval_f1_macro": 0.8354987049773379,
+ "eval_loss": 0.24721089005470276,
+ "eval_pr_auc": 0.7290279509427341,
+ "eval_precision": 0.7175499841320215,
+ "eval_precision_macro": 0.8333466455139735,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.837699192866343,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.255,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3474
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9122432377465934,
+ "eval_auc": 0.9411616325348253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7239283429302623,
+ "eval_f1_macro": 0.8358790547924193,
+ "eval_loss": 0.2470986247062683,
+ "eval_pr_auc": 0.7293209780794321,
+ "eval_precision": 0.7181847032688036,
+ "eval_precision_macro": 0.8337245487646312,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.83808203044532,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.922,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3492
+ },
+ {
+ "epoch": 194.44444444444446,
+ "grad_norm": 15854.8017578125,
+ "learning_rate": 7.667662546617938e-07,
+ "loss": 0.2185,
+ "step": 3500
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9121923937360179,
+ "eval_auc": 0.9412192294636534,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7248685677871595,
+ "eval_f1_macro": 0.8363143165624445,
+ "eval_loss": 0.2470363825559616,
+ "eval_pr_auc": 0.7294473908430833,
+ "eval_precision": 0.7163098236775819,
+ "eval_precision_macro": 0.8331124670170592,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.839624543937532,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.796,
+ "eval_steps_per_second": 3.95,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9123449257677445,
+ "eval_auc": 0.9413068608842632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7252151737328658,
+ "eval_f1_macro": 0.8365353589310388,
+ "eval_loss": 0.2468923032283783,
+ "eval_pr_auc": 0.7297206006779651,
+ "eval_precision": 0.7169870784746297,
+ "eval_precision_macro": 0.8334556489675362,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.83971508537533,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.86,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3528
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9124466137888957,
+ "eval_auc": 0.9415050729580239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7246562200191877,
+ "eval_f1_macro": 0.8363018721763311,
+ "eval_loss": 0.2465437948703766,
+ "eval_pr_auc": 0.7306168212570879,
+ "eval_precision": 0.7186806216301934,
+ "eval_precision_macro": 0.8340602623742853,
+ "eval_pred_class_0": 16515,
+ "eval_pred_class_1": 3153,
+ "eval_predicted_binding_ratio": 0.1603111653447224,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8385959258552536,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.769,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3546
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9127008338417735,
+ "eval_auc": 0.9416286951597772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7254996003197443,
+ "eval_f1_macro": 0.836798347664482,
+ "eval_loss": 0.24635502696037292,
+ "eval_pr_auc": 0.7311574695224861,
+ "eval_precision": 0.7194039315155358,
+ "eval_precision_macro": 0.8345112185130059,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8391400017444531,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.086,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3564
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9126499898311979,
+ "eval_auc": 0.941704083096699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7255591054313099,
+ "eval_f1_macro": 0.8368090605158727,
+ "eval_loss": 0.24623039364814758,
+ "eval_pr_auc": 0.7315041353273113,
+ "eval_precision": 0.7188983855650523,
+ "eval_precision_macro": 0.8343113891602595,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7323444050306352,
+ "eval_recall_macro": 0.8393719369271001,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.096,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3582
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.912751677852349,
+ "eval_auc": 0.9417882886776758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266645428480408,
+ "eval_f1_macro": 0.8373778882187448,
+ "eval_loss": 0.24614199995994568,
+ "eval_pr_auc": 0.7317905290059212,
+ "eval_precision": 0.7179729304375196,
+ "eval_precision_macro": 0.8341244192542944,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8407428761951972,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.19,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3600
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9129042098840756,
+ "eval_auc": 0.9419125142943645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7274463007159905,
+ "eval_f1_macro": 0.837808654578745,
+ "eval_loss": 0.2459731251001358,
+ "eval_pr_auc": 0.7322365639924645,
+ "eval_precision": 0.717964824120603,
+ "eval_precision_macro": 0.8342614705412528,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7371815543373106,
+ "eval_recall_macro": 0.8414887067877777,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.607,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3618
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9131075859263779,
+ "eval_auc": 0.9421228725268236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7264286857691692,
+ "eval_f1_macro": 0.8373900508237788,
+ "eval_loss": 0.2455640733242035,
+ "eval_pr_auc": 0.7332689412482398,
+ "eval_precision": 0.721233312142403,
+ "eval_precision_macro": 0.8354381062588301,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8393814455785812,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.062,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3636
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9132092739475289,
+ "eval_auc": 0.9422253168108461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266613290632506,
+ "eval_f1_macro": 0.8375381529725912,
+ "eval_loss": 0.24535632133483887,
+ "eval_pr_auc": 0.7337329229212076,
+ "eval_precision": 0.7216921119592875,
+ "eval_precision_macro": 0.8356705536799585,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8394418065371132,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.363,
+ "eval_steps_per_second": 3.781,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3654
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.9133109619686801,
+ "eval_auc": 0.9423872461284946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7268066015061689,
+ "eval_f1_macro": 0.8376441226295008,
+ "eval_loss": 0.2451435625553131,
+ "eval_pr_auc": 0.7344416461934514,
+ "eval_precision": 0.7222929936305732,
+ "eval_precision_macro": 0.8359468356342605,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.8393711096646888,
+ "eval_runtime": 0.24,
+ "eval_samples_per_second": 679.026,
+ "eval_steps_per_second": 4.166,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3672
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9138194020744357,
+ "eval_auc": 0.9424586242758461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7293629251157592,
+ "eval_f1_macro": 0.8390563302747484,
+ "eval_loss": 0.24504327774047852,
+ "eval_pr_auc": 0.7346240551024029,
+ "eval_precision": 0.7223276407337128,
+ "eval_precision_macro": 0.8364152440915626,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7365366010964205,
+ "eval_recall_macro": 0.8417698397526527,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.36,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3690
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.91376855806386,
+ "eval_auc": 0.9426215171108914,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.729073482428115,
+ "eval_f1_macro": 0.8388988164347613,
+ "eval_loss": 0.2448122650384903,
+ "eval_pr_auc": 0.7352376772544322,
+ "eval_precision": 0.7223805001582779,
+ "eval_precision_macro": 0.8363855980711433,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7358916478555305,
+ "eval_recall_macro": 0.8414775436114739,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.41,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3708
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9139210900955868,
+ "eval_auc": 0.9426929439207377,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7299409794225554,
+ "eval_f1_macro": 0.8393709493840633,
+ "eval_loss": 0.24476298689842224,
+ "eval_pr_auc": 0.7354485111055644,
+ "eval_precision": 0.7222222222222222,
+ "eval_precision_macro": 0.8364747474747475,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8423544320350108,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.323,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3726
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9138702460850112,
+ "eval_auc": 0.9428956913390121,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.72896,
+ "eval_f1_macro": 0.8388800483588226,
+ "eval_loss": 0.24439764022827148,
+ "eval_pr_auc": 0.736435270049487,
+ "eval_precision": 0.723404255319149,
+ "eval_precision_macro": 0.8367914187788916,
+ "eval_pred_class_0": 16519,
+ "eval_pred_class_1": 3149,
+ "eval_predicted_binding_ratio": 0.16010778930242017,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.8410136732461799,
+ "eval_runtime": 0.2835,
+ "eval_samples_per_second": 574.921,
+ "eval_steps_per_second": 3.527,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9140736221273134,
+ "eval_auc": 0.9429205676063466,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.730805989168525,
+ "eval_f1_macro": 0.83984185960937,
+ "eval_loss": 0.24438706040382385,
+ "eval_pr_auc": 0.7364306271247294,
+ "eval_precision": 0.722064841045011,
+ "eval_precision_macro": 0.8365645289452815,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7397613673008707,
+ "eval_recall_macro": 0.8432313204585479,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.222,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3762
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9142769981696156,
+ "eval_auc": 0.9430619808161933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7309288222151292,
+ "eval_f1_macro": 0.8399730291904192,
+ "eval_loss": 0.2441486269235611,
+ "eval_pr_auc": 0.7370811402900591,
+ "eval_precision": 0.7235387045813586,
+ "eval_precision_macro": 0.8371980622222068,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7384714608190907,
+ "eval_recall_macro": 0.842827811051786,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.291,
+ "eval_steps_per_second": 3.738,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3780
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9146329062436445,
+ "eval_auc": 0.9431815542983806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7325155328978812,
+ "eval_f1_macro": 0.8408637738901821,
+ "eval_loss": 0.24397221207618713,
+ "eval_pr_auc": 0.7374915342644908,
+ "eval_precision": 0.7238664987405542,
+ "eval_precision_macro": 0.8376184300639468,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7413737504030957,
+ "eval_recall_macro": 0.8442185948852564,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.425,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3798
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9432977798009264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7319636884854276,
+ "eval_f1_macro": 0.8405258137499286,
+ "eval_loss": 0.24377743899822235,
+ "eval_pr_auc": 0.7380125440447487,
+ "eval_precision": 0.7230962869729389,
+ "eval_precision_macro": 0.837200053735105,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7410512737826508,
+ "eval_recall_macro": 0.8439668151372359,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.044,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3816
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9434619086633391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7311072056239016,
+ "eval_f1_macro": 0.8401129643018078,
+ "eval_loss": 0.24348998069763184,
+ "eval_pr_auc": 0.7388402605739814,
+ "eval_precision": 0.7245091830272324,
+ "eval_precision_macro": 0.8376331499630408,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8426562368276709,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 652.954,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3834
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9147854382753712,
+ "eval_auc": 0.943601764673353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310654685494223,
+ "eval_f1_macro": 0.8402185728440683,
+ "eval_loss": 0.24328412115573883,
+ "eval_pr_auc": 0.7395557790782865,
+ "eval_precision": 0.727563078888534,
+ "eval_precision_macro": 0.838897945080114,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.841556921872968,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.38,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3852
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9146837502542201,
+ "eval_auc": 0.9436924715636332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310897435897435,
+ "eval_f1_macro": 0.8401943762667112,
+ "eval_loss": 0.2431441992521286,
+ "eval_pr_auc": 0.7398761361047077,
+ "eval_precision": 0.7266645428480408,
+ "eval_precision_macro": 0.838527383046018,
+ "eval_pred_class_0": 16529,
+ "eval_pred_class_1": 3139,
+ "eval_predicted_binding_ratio": 0.15959934919666463,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8418897344073055,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 631.075,
+ "eval_steps_per_second": 3.872,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3870
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9147345942647956,
+ "eval_auc": 0.9437266715649686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7332591060919358,
+ "eval_f1_macro": 0.8412581348487456,
+ "eval_loss": 0.24318096041679382,
+ "eval_pr_auc": 0.739819563727558,
+ "eval_precision": 0.7234777150031387,
+ "eval_precision_macro": 0.8375913025931845,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7433086101257659,
+ "eval_recall_macro": 0.8450653028295274,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.716,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3888
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.915039658328249,
+ "eval_auc": 0.9438657295100676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7336202773792444,
+ "eval_f1_macro": 0.8415401994826537,
+ "eval_loss": 0.24295340478420258,
+ "eval_pr_auc": 0.740473021691125,
+ "eval_precision": 0.7254098360655737,
+ "eval_precision_macro": 0.8384566154139702,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8447221543812975,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.926,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3906
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9148362822859467,
+ "eval_auc": 0.9439684365715622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7333227193122114,
+ "eval_f1_macro": 0.8413247993777817,
+ "eval_loss": 0.24282881617546082,
+ "eval_pr_auc": 0.7409189185674594,
+ "eval_precision": 0.7242138364779874,
+ "eval_precision_macro": 0.837907500480624,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8448635481261465,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.434,
+ "eval_steps_per_second": 3.923,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3924
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9149888143176734,
+ "eval_auc": 0.9440560582596731,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7337579617834394,
+ "eval_f1_macro": 0.8415885646284089,
+ "eval_loss": 0.24268390238285065,
+ "eval_pr_auc": 0.7412204047828347,
+ "eval_precision": 0.724756212645486,
+ "eval_precision_macro": 0.8382104794199593,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7429861335053208,
+ "eval_recall_macro": 0.845085147394901,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.151,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3942
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9152430343705511,
+ "eval_auc": 0.9441587945186644,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7346808849275823,
+ "eval_f1_macro": 0.842123366857946,
+ "eval_loss": 0.24253520369529724,
+ "eval_pr_auc": 0.7415422610927452,
+ "eval_precision": 0.7253299811439347,
+ "eval_precision_macro": 0.8386142808788943,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7442760399871009,
+ "eval_recall_macro": 0.8457602811150571,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.818,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3960
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.9442562168332251,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7354531001589825,
+ "eval_f1_macro": 0.8425495241156832,
+ "eval_loss": 0.2423904687166214,
+ "eval_pr_auc": 0.741902799087436,
+ "eval_precision": 0.7253057384760113,
+ "eval_precision_macro": 0.8387436514456639,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8465061117076376,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.926,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.944399226172901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.734609250398724,
+ "eval_f1_macro": 0.8421428275824746,
+ "eval_loss": 0.24210123717784882,
+ "eval_pr_auc": 0.7426309034006747,
+ "eval_precision": 0.7267276743452193,
+ "eval_precision_macro": 0.8391805533372256,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8451955333980726,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.607,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3996
+ },
+ {
+ "epoch": 222.22222222222223,
+ "grad_norm": 16301.5107421875,
+ "learning_rate": 6.802697587657594e-07,
+ "loss": 0.211,
+ "step": 4000
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9156497864551556,
+ "eval_auc": 0.9445356174132858,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.735026353617633,
+ "eval_f1_macro": 0.8424338117294514,
+ "eval_loss": 0.2418563961982727,
+ "eval_pr_auc": 0.7432491183346004,
+ "eval_precision": 0.7281645569620253,
+ "eval_precision_macro": 0.8398516024451512,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8450843201324897,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.886,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4014
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9159040065080334,
+ "eval_auc": 0.9445885524751136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.736624203821656,
+ "eval_f1_macro": 0.8432939508943711,
+ "eval_loss": 0.24180874228477478,
+ "eval_pr_auc": 0.7434863322076849,
+ "eval_precision": 0.7275872916011324,
+ "eval_precision_macro": 0.8398989281099847,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8468079165002977,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.671,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4032
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9159548505186089,
+ "eval_auc": 0.9446536531606378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7374106433677522,
+ "eval_f1_macro": 0.8436909456056703,
+ "eval_loss": 0.24177636206150055,
+ "eval_pr_auc": 0.7437995583771988,
+ "eval_precision": 0.7266750156543519,
+ "eval_precision_macro": 0.8396638402297497,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7484682360528861,
+ "eval_recall_macro": 0.8478865596272157,
+ "eval_runtime": 0.2167,
+ "eval_samples_per_second": 752.113,
+ "eval_steps_per_second": 4.614,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4050
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9161582265609112,
+ "eval_auc": 0.9447719419529625,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.737629276054097,
+ "eval_f1_macro": 0.843868342907385,
+ "eval_loss": 0.24154822528362274,
+ "eval_pr_auc": 0.7443388353128296,
+ "eval_precision": 0.7280150753768844,
+ "eval_precision_macro": 0.8402572343640063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7475008061915511,
+ "eval_recall_macro": 0.8476141080514102,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.202,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4068
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9448749507219245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7381974248927039,
+ "eval_f1_macro": 0.8441781495775367,
+ "eval_loss": 0.2414349913597107,
+ "eval_pr_auc": 0.7447448668341484,
+ "eval_precision": 0.7278996865203762,
+ "eval_precision_macro": 0.840312265884293,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8481987003337683,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.465,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4086
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9163107585926378,
+ "eval_auc": 0.9449481196490842,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.738314785373609,
+ "eval_f1_macro": 0.8442527143596241,
+ "eval_loss": 0.2413274347782135,
+ "eval_pr_auc": 0.7451543649133733,
+ "eval_precision": 0.7281279397930386,
+ "eval_precision_macro": 0.840427826926679,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8482288808130343,
+ "eval_runtime": 0.2185,
+ "eval_samples_per_second": 746.149,
+ "eval_steps_per_second": 4.578,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4104
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9450982726429946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7376135096383623,
+ "eval_f1_macro": 0.843896745442007,
+ "eval_loss": 0.2410273402929306,
+ "eval_pr_auc": 0.7459954701052622,
+ "eval_precision": 0.728904282115869,
+ "eval_precision_macro": 0.8406224054285385,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7465333763302161,
+ "eval_recall_macro": 0.8472812955170728,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.64,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4122
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9167683546878178,
+ "eval_auc": 0.9451833444163787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7392066273697626,
+ "eval_f1_macro": 0.8448445490519523,
+ "eval_loss": 0.2408701479434967,
+ "eval_pr_auc": 0.74641099889946,
+ "eval_precision": 0.7304785894206549,
+ "eval_precision_macro": 0.8415611477299734,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7481457594324411,
+ "eval_recall_macro": 0.8482383894645154,
+ "eval_runtime": 0.2678,
+ "eval_samples_per_second": 608.615,
+ "eval_steps_per_second": 3.734,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4140
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9167175106772423,
+ "eval_auc": 0.9452793360535927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7393380012730745,
+ "eval_f1_macro": 0.8448898647294817,
+ "eval_loss": 0.24077175557613373,
+ "eval_pr_auc": 0.7467905435777061,
+ "eval_precision": 0.7298146402764687,
+ "eval_precision_macro": 0.8413101105537636,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7491131892937762,
+ "eval_recall_macro": 0.8486013824781189,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.884,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4158
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9163616026032133,
+ "eval_auc": 0.9453335850027798,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7391786903440621,
+ "eval_f1_macro": 0.8446869866386211,
+ "eval_loss": 0.24075280129909515,
+ "eval_pr_auc": 0.7469096614042753,
+ "eval_precision": 0.7270742358078602,
+ "eval_precision_macro": 0.840149923152381,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.8494385817709088,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.647,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4176
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9168191986983933,
+ "eval_auc": 0.9454244865430391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7405645417063115,
+ "eval_f1_macro": 0.845516906033295,
+ "eval_loss": 0.24061860144138336,
+ "eval_pr_auc": 0.7472467341999135,
+ "eval_precision": 0.7285491419656787,
+ "eval_precision_macro": 0.8410102813636934,
+ "eval_pred_class_0": 16463,
+ "eval_pred_class_1": 3205,
+ "eval_predicted_binding_ratio": 0.1629550538946512,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8502344374081289,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.77,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4194
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9171242627618467,
+ "eval_auc": 0.9455514956544295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7409408773045136,
+ "eval_f1_macro": 0.8458063544009555,
+ "eval_loss": 0.24034352600574493,
+ "eval_pr_auc": 0.7478642707879094,
+ "eval_precision": 0.7304920087746788,
+ "eval_precision_macro": 0.841880100399963,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.849891288959899,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.626,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9170734187512711,
+ "eval_auc": 0.945608099868364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7413984461709212,
+ "eval_f1_macro": 0.8460087995182923,
+ "eval_loss": 0.24029456079006195,
+ "eval_pr_auc": 0.7479864003180418,
+ "eval_precision": 0.7292576419213974,
+ "eval_precision_macro": 0.8414542370705274,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8507785132973285,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.582,
+ "eval_steps_per_second": 4.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4230
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.9172767947935733,
+ "eval_auc": 0.9457173958316524,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7416229950770208,
+ "eval_f1_macro": 0.8461890816058248,
+ "eval_loss": 0.2400863915681839,
+ "eval_pr_auc": 0.7485735786505677,
+ "eval_precision": 0.7306007509386734,
+ "eval_precision_macro": 0.8420487970332027,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.850506061721523,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.11,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4248
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9173784828147244,
+ "eval_auc": 0.9457852508143816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7421044278685923,
+ "eval_f1_macro": 0.8464570875531852,
+ "eval_loss": 0.24001120030879974,
+ "eval_pr_auc": 0.7487297504117033,
+ "eval_precision": 0.730625,
+ "eval_precision_macro": 0.8421463596065095,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8509595961729245,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.497,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4266
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9176835468781778,
+ "eval_auc": 0.9458918216779619,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7428934413212641,
+ "eval_f1_macro": 0.8469453737675663,
+ "eval_loss": 0.23980914056301117,
+ "eval_pr_auc": 0.7491659673680734,
+ "eval_precision": 0.7318523153942428,
+ "eval_precision_macro": 0.8427959974251447,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7542728152208965,
+ "eval_recall_macro": 0.8512717368794771,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.674,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4284
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9175818588570266,
+ "eval_auc": 0.9459814190633611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7427392477384542,
+ "eval_f1_macro": 0.8468350393376697,
+ "eval_loss": 0.2397017627954483,
+ "eval_pr_auc": 0.7495363660441035,
+ "eval_precision": 0.73125,
+ "eval_precision_macro": 0.8425195834345397,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8513424337519016,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.187,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4302
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9177852348993288,
+ "eval_auc": 0.9460787245879343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7425569176882661,
+ "eval_f1_macro": 0.846819224235148,
+ "eval_loss": 0.2394852489233017,
+ "eval_pr_auc": 0.7500368484248636,
+ "eval_precision": 0.7333333333333333,
+ "eval_precision_macro": 0.8433466763706938,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7520154788777813,
+ "eval_recall_macro": 0.8504146930213136,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.01,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4320
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.91788692292048,
+ "eval_auc": 0.9461055278900622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7437728066000318,
+ "eval_f1_macro": 0.8474411515820368,
+ "eval_loss": 0.23946216702461243,
+ "eval_pr_auc": 0.7500126507936957,
+ "eval_precision": 0.7320424734540912,
+ "eval_precision_macro": 0.8430344761294506,
+ "eval_pred_class_0": 16466,
+ "eval_pred_class_1": 3202,
+ "eval_predicted_binding_ratio": 0.16280252186292454,
+ "eval_recall": 0.7558851983231216,
+ "eval_recall_macro": 0.8520477479513235,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.523,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4338
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9178360789099044,
+ "eval_auc": 0.9462651603379567,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7429207763283487,
+ "eval_f1_macro": 0.8470125818101653,
+ "eval_loss": 0.23918889462947845,
+ "eval_pr_auc": 0.7507506323790389,
+ "eval_precision": 0.7331240188383046,
+ "eval_precision_macro": 0.8433259480225619,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8508380469934491,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.073,
+ "eval_steps_per_second": 3.724,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4356
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9181411429733577,
+ "eval_auc": 0.9463650352422546,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7440381558028617,
+ "eval_f1_macro": 0.847659094847506,
+ "eval_loss": 0.2390899360179901,
+ "eval_pr_auc": 0.7511682669955798,
+ "eval_precision": 0.7337723424270931,
+ "eval_precision_macro": 0.8437961778887089,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8516744190238277,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.141,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4374
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.918446207036811,
+ "eval_auc": 0.9465344196541042,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7436061381074168,
+ "eval_f1_macro": 0.8475588127054617,
+ "eval_loss": 0.2388136237859726,
+ "eval_pr_auc": 0.7520628478642977,
+ "eval_precision": 0.7372424722662441,
+ "eval_precision_macro": 0.8451548762954184,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7500806191551113,
+ "eval_recall_macro": 0.8500206922660327,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.538,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4392
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9182936750050844,
+ "eval_auc": 0.946561008841255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7443931923015746,
+ "eval_f1_macro": 0.8478842115097998,
+ "eval_loss": 0.2388090342283249,
+ "eval_pr_auc": 0.7520269916576209,
+ "eval_precision": 0.7344632768361582,
+ "eval_precision_macro": 0.844145847858681,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8517649604616258,
+ "eval_runtime": 0.2356,
+ "eval_samples_per_second": 691.824,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4410
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.9185478950579622,
+ "eval_auc": 0.9466103526109676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7456335344553826,
+ "eval_f1_macro": 0.8485719582198821,
+ "eval_loss": 0.23877908289432526,
+ "eval_pr_auc": 0.7522268586665612,
+ "eval_precision": 0.7344385361276197,
+ "eval_precision_macro": 0.844360910951309,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8529643255056077,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.409,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4428
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9181919869839333,
+ "eval_auc": 0.9466863828928207,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.74464370734804,
+ "eval_f1_macro": 0.847968894691123,
+ "eval_loss": 0.23864901065826416,
+ "eval_pr_auc": 0.7525479720158522,
+ "eval_precision": 0.733125,
+ "eval_precision_macro": 0.84363925491863,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8524909464888326,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.776,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9184970510473867,
+ "eval_auc": 0.9467725739035849,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7454343338097507,
+ "eval_f1_macro": 0.8484579580910492,
+ "eval_loss": 0.23847386240959167,
+ "eval_pr_auc": 0.752960490937812,
+ "eval_precision": 0.7343554443053817,
+ "eval_precision_macro": 0.8442903982090288,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7568526281844566,
+ "eval_recall_macro": 0.8528030871953852,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.673,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4464
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9183953630262355,
+ "eval_auc": 0.9468334214870647,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7452785272179019,
+ "eval_f1_macro": 0.8483468464756075,
+ "eval_loss": 0.238382488489151,
+ "eval_pr_auc": 0.7531576423642267,
+ "eval_precision": 0.73375,
+ "eval_precision_macro": 0.8440124787466602,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8528737840678097,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.912,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4482
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 32703.09375,
+ "learning_rate": 5.870150616070439e-07,
+ "loss": 0.2045,
+ "step": 4500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9189546471425666,
+ "eval_auc": 0.9469757203543168,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7464206172446707,
+ "eval_f1_macro": 0.849095331315225,
+ "eval_loss": 0.23810486495494843,
+ "eval_pr_auc": 0.7538503066163141,
+ "eval_precision": 0.7365777080062794,
+ "eval_precision_macro": 0.8453864697284325,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8529436536778228,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.492,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9192088671954444,
+ "eval_auc": 0.9470911964544428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465305471367044,
+ "eval_f1_macro": 0.8492382980338313,
+ "eval_loss": 0.23794293403625488,
+ "eval_pr_auc": 0.7543678751573928,
+ "eval_precision": 0.7386363636363636,
+ "eval_precision_macro": 0.8462575757575758,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8523082090884139,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4518
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9186495830791133,
+ "eval_auc": 0.9471146615094285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7459510955859003,
+ "eval_f1_macro": 0.8487610069611806,
+ "eval_loss": 0.2380078136920929,
+ "eval_pr_auc": 0.7543445385135205,
+ "eval_precision": 0.7347513293712856,
+ "eval_precision_macro": 0.8445476639570896,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8531557442950962,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.051,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4536
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9187004270896888,
+ "eval_auc": 0.9472153636761378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7461501825686617,
+ "eval_f1_macro": 0.8488749520465066,
+ "eval_loss": 0.23785638809204102,
+ "eval_pr_auc": 0.7549126832149435,
+ "eval_precision": 0.7348342714196373,
+ "eval_precision_macro": 0.8446181071730852,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8533169826053187,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.46,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4554
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.918903803131991,
+ "eval_auc": 0.9473291852514412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465437788018433,
+ "eval_f1_macro": 0.849136671654349,
+ "eval_loss": 0.23767386376857758,
+ "eval_pr_auc": 0.7554825230801616,
+ "eval_precision": 0.7359022556390977,
+ "eval_precision_macro": 0.8451300547435596,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8533066466914263,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.53,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4572
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9196156192800488,
+ "eval_auc": 0.9474168847995438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7481280866656046,
+ "eval_f1_macro": 0.8501522492676461,
+ "eval_loss": 0.23749451339244843,
+ "eval_pr_auc": 0.7559063738482461,
+ "eval_precision": 0.739294710327456,
+ "eval_precision_macro": 0.8468181046180088,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8535981155701939,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.581,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4590
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9195647752694732,
+ "eval_auc": 0.9474761751831905,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7480089200382287,
+ "eval_f1_macro": 0.8500768176935048,
+ "eval_loss": 0.23738548159599304,
+ "eval_pr_auc": 0.7561572732622138,
+ "eval_precision": 0.7390620081838212,
+ "eval_precision_macro": 0.8467003692001515,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853567935090928,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.111,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4608
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9475989409250355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486051331101546,
+ "eval_f1_macro": 0.8504541559450298,
+ "eval_loss": 0.23718814551830292,
+ "eval_pr_auc": 0.7567216824275462,
+ "eval_precision": 0.7402269861286255,
+ "eval_precision_macro": 0.8472897782243516,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853718837487258,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.368,
+ "eval_steps_per_second": 4.665,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4626
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9193613992271711,
+ "eval_auc": 0.9475927315907009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.748013981569749,
+ "eval_f1_macro": 0.8500072329009691,
+ "eval_loss": 0.23723167181015015,
+ "eval_pr_auc": 0.7565508642499409,
+ "eval_precision": 0.7372377074851237,
+ "eval_precision_macro": 0.845948140540741,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.8542335601596028,
+ "eval_runtime": 0.2406,
+ "eval_samples_per_second": 677.35,
+ "eval_steps_per_second": 4.156,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4644
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9197681513117755,
+ "eval_auc": 0.9476849762158163,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486460656259956,
+ "eval_f1_macro": 0.8504558902151395,
+ "eval_loss": 0.23700466752052307,
+ "eval_pr_auc": 0.7570817989267699,
+ "eval_precision": 0.7396915328926661,
+ "eval_precision_macro": 0.8470757706910725,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8539507726699049,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.906,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4662
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9199715273540777,
+ "eval_auc": 0.9477206555569931,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495225970719287,
+ "eval_f1_macro": 0.8509503339952407,
+ "eval_loss": 0.23693729937076569,
+ "eval_pr_auc": 0.7572519889982183,
+ "eval_precision": 0.7398680490103676,
+ "eval_precision_macro": 0.8473073942352414,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8547267837417515,
+ "eval_runtime": 0.2431,
+ "eval_samples_per_second": 670.398,
+ "eval_steps_per_second": 4.113,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9477750407611654,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7492447129909365,
+ "eval_f1_macro": 0.8507623994645729,
+ "eval_loss": 0.23685960471630096,
+ "eval_pr_auc": 0.7574807332819814,
+ "eval_precision": 0.739021329987453,
+ "eval_precision_macro": 0.8469075096539207,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.85476730013491,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.06,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4698
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.947870010485989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23665639758110046,
+ "eval_pr_auc": 0.7579557575566394,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4716
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9201749033963799,
+ "eval_auc": 0.9479619047411422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501591343093571,
+ "eval_f1_macro": 0.8513291133243506,
+ "eval_loss": 0.23653987050056458,
+ "eval_pr_auc": 0.7584013256784117,
+ "eval_precision": 0.7404963870562362,
+ "eval_precision_macro": 0.8476822244653337,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7600773943889068,
+ "eval_recall_macro": 0.8551096213207285,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.941,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4734
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.9203274354281066,
+ "eval_auc": 0.9480515799865329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501195981502152,
+ "eval_f1_macro": 0.8513640482812168,
+ "eval_loss": 0.2363332211971283,
+ "eval_pr_auc": 0.7588834286830018,
+ "eval_precision": 0.7419558359621451,
+ "eval_precision_macro": 0.848278196802748,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7584650112866818,
+ "eval_recall_macro": 0.8545448736037441,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.351,
+ "eval_steps_per_second": 3.953,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4752
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9481172938194913,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.750557502389296,
+ "eval_f1_macro": 0.8515931077800434,
+ "eval_loss": 0.23625436425209045,
+ "eval_pr_auc": 0.7591087523185005,
+ "eval_precision": 0.7415801070192005,
+ "eval_precision_macro": 0.8482019751638359,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8550992854068361,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.903,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4770
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.9482093632596256,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23614051938056946,
+ "eval_pr_auc": 0.7594934170637511,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.024,
+ "eval_steps_per_second": 3.914,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4788
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9204291234492576,
+ "eval_auc": 0.9483264257570818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7504385265507894,
+ "eval_f1_macro": 0.8515537559413557,
+ "eval_loss": 0.23595084249973297,
+ "eval_pr_auc": 0.7600448623712702,
+ "eval_precision": 0.7422712933753943,
+ "eval_precision_macro": 0.8484662322132155,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7587874879071267,
+ "eval_recall_macro": 0.8547362923932326,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.531,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4806
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9483670102777331,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7509541984732825,
+ "eval_f1_macro": 0.8517842887791249,
+ "eval_loss": 0.23594258725643158,
+ "eval_pr_auc": 0.7601171664174119,
+ "eval_precision": 0.740822089739567,
+ "eval_precision_macro": 0.8479609508220922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8557545745616185,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.883,
+ "eval_steps_per_second": 4.061,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4824
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.920276591417531,
+ "eval_auc": 0.9484603741402287,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7505567928730512,
+ "eval_f1_macro": 0.8515567625484772,
+ "eval_loss": 0.23575998842716217,
+ "eval_pr_auc": 0.7605631058573062,
+ "eval_precision": 0.7406593406593407,
+ "eval_precision_macro": 0.8478216317444613,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8554320979411736,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.928,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4842
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9206833435021354,
+ "eval_auc": 0.9485513243429828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.751434034416826,
+ "eval_f1_macro": 0.8521235507837306,
+ "eval_loss": 0.235606849193573,
+ "eval_pr_auc": 0.7610077759721279,
+ "eval_precision": 0.7426771653543307,
+ "eval_precision_macro": 0.8488138752255192,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7603998710093518,
+ "eval_recall_macro": 0.8555424839443451,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.303,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4860
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.948647082400222,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7519948930737312,
+ "eval_f1_macro": 0.8525018311755109,
+ "eval_loss": 0.23542079329490662,
+ "eval_pr_auc": 0.7614859215167992,
+ "eval_precision": 0.744391785150079,
+ "eval_precision_macro": 0.8496242389363071,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8554614511580283,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.61,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4878
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487102755159504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517120560598821,
+ "eval_f1_macro": 0.8522755458325244,
+ "eval_loss": 0.2353215366601944,
+ "eval_pr_auc": 0.761752604262035,
+ "eval_precision": 0.7426054122089364,
+ "eval_precision_macro": 0.8488345435817272,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8558347800855242,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.279,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4896
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487825490532058,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517911160643209,
+ "eval_f1_macro": 0.8523136490925144,
+ "eval_loss": 0.23522616922855377,
+ "eval_pr_auc": 0.7620046741511783,
+ "eval_precision": 0.7424528301886792,
+ "eval_precision_macro": 0.8487858522607636,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8559658379164806,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.657,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9208358755338621,
+ "eval_auc": 0.9488776355680169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7518725099601593,
+ "eval_f1_macro": 0.8523888728682258,
+ "eval_loss": 0.2350645512342453,
+ "eval_pr_auc": 0.7623918274747735,
+ "eval_precision": 0.7432262129804663,
+ "eval_precision_macro": 0.8491200787225601,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8557640832130997,
+ "eval_runtime": 0.2427,
+ "eval_samples_per_second": 671.591,
+ "eval_steps_per_second": 4.12,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4932
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9489531597599242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7525510204081632,
+ "eval_f1_macro": 0.8528058755561261,
+ "eval_loss": 0.2349635362625122,
+ "eval_pr_auc": 0.7627186958629152,
+ "eval_precision": 0.7442447177546515,
+ "eval_precision_macro": 0.8496637300357182,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8560460434403863,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.549,
+ "eval_steps_per_second": 3.838,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4950
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9489723619803666,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524626628535113,
+ "eval_f1_macro": 0.8526552767085183,
+ "eval_loss": 0.2350020557641983,
+ "eval_pr_auc": 0.7627735453579831,
+ "eval_precision": 0.7416222987785781,
+ "eval_precision_macro": 0.8485653223786669,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8569134232124421,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.627,
+ "eval_steps_per_second": 3.93,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4968
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.9490740568619694,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524689391525964,
+ "eval_f1_macro": 0.8527303253449472,
+ "eval_loss": 0.2348015159368515,
+ "eval_pr_auc": 0.7632371897507115,
+ "eval_precision": 0.7434686811457349,
+ "eval_precision_macro": 0.8493281796365992,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7616897774911319,
+ "eval_recall_macro": 0.8562477981437672,
+ "eval_runtime": 0.2476,
+ "eval_samples_per_second": 658.287,
+ "eval_steps_per_second": 4.039,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4986
+ },
+ {
+ "epoch": 277.77777777777777,
+ "grad_norm": 14799.8212890625,
+ "learning_rate": 4.904982238472025e-07,
+ "loss": 0.199,
+ "step": 5000
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9491519363186243,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7514358647096363,
+ "eval_f1_macro": 0.8521604145127957,
+ "eval_loss": 0.23470228910446167,
+ "eval_pr_auc": 0.7636185597213633,
+ "eval_precision": 0.7436059362172402,
+ "eval_precision_macro": 0.8491982774838095,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8552096714100077,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.482,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5004
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9209375635550132,
+ "eval_auc": 0.9492209202712323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7521912350597609,
+ "eval_f1_macro": 0.8525784825369885,
+ "eval_loss": 0.2345963418483734,
+ "eval_pr_auc": 0.7638862490185815,
+ "eval_precision": 0.7435412728418399,
+ "eval_precision_macro": 0.8493079227068421,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8559555020025882,
+ "eval_runtime": 0.273,
+ "eval_samples_per_second": 597.168,
+ "eval_steps_per_second": 3.664,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5022
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9492453877736104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7532591414944356,
+ "eval_f1_macro": 0.8531471523002045,
+ "eval_loss": 0.23453067243099213,
+ "eval_pr_auc": 0.7639137465976396,
+ "eval_precision": 0.7428661022264033,
+ "eval_precision_macro": 0.8492229655497572,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8572255639189947,
+ "eval_runtime": 0.2247,
+ "eval_samples_per_second": 725.266,
+ "eval_steps_per_second": 4.449,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5040
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9493002596027307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7531806615776081,
+ "eval_f1_macro": 0.8531093334515976,
+ "eval_loss": 0.2344331294298172,
+ "eval_pr_auc": 0.7641447446055818,
+ "eval_precision": 0.7430185127078758,
+ "eval_precision_macro": 0.8492715280607518,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8570945060880382,
+ "eval_runtime": 0.263,
+ "eval_samples_per_second": 619.693,
+ "eval_steps_per_second": 3.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5058
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9493485327975579,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7538534880025425,
+ "eval_f1_macro": 0.8534875889608693,
+ "eval_loss": 0.2344052791595459,
+ "eval_pr_auc": 0.764296468123137,
+ "eval_precision": 0.7431077694235589,
+ "eval_precision_macro": 0.849430796583593,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8577092788496623,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.097,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5076
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9494190544850012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7536969311496263,
+ "eval_f1_macro": 0.8534121476034391,
+ "eval_loss": 0.23425185680389404,
+ "eval_pr_auc": 0.7646652388104445,
+ "eval_precision": 0.7434127979924717,
+ "eval_precision_macro": 0.8495280009379834,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8574471631877492,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.881,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5094
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495173137944721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7539000318369946,
+ "eval_f1_macro": 0.853564041452472,
+ "eval_loss": 0.2340681403875351,
+ "eval_pr_auc": 0.7651886756989377,
+ "eval_precision": 0.744419993712669,
+ "eval_precision_macro": 0.8499803613859639,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8572755889636343,
+ "eval_runtime": 0.2477,
+ "eval_samples_per_second": 658.168,
+ "eval_steps_per_second": 4.038,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5112
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495497619459952,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7540566337893733,
+ "eval_f1_macro": 0.8536395120535369,
+ "eval_loss": 0.234034925699234,
+ "eval_pr_auc": 0.7653794962608654,
+ "eval_precision": 0.7441130298273155,
+ "eval_precision_macro": 0.8498821534503319,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8575377046255472,
+ "eval_runtime": 0.2024,
+ "eval_samples_per_second": 805.487,
+ "eval_steps_per_second": 4.942,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5130
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9215476916819199,
+ "eval_auc": 0.9496228140831674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.754181934044926,
+ "eval_f1_macro": 0.8537539029854382,
+ "eval_loss": 0.23388919234275818,
+ "eval_pr_auc": 0.7657504791267543,
+ "eval_precision": 0.7452770780856424,
+ "eval_precision_macro": 0.8503853253634615,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.8572350725704758,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.381,
+ "eval_steps_per_second": 5.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9497348546111616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7550662198819211,
+ "eval_f1_macro": 0.8543240621923138,
+ "eval_loss": 0.2337103933095932,
+ "eval_pr_auc": 0.7663000808208629,
+ "eval_precision": 0.7473152242577384,
+ "eval_precision_macro": 0.8513875842534602,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8573454585736473,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.107,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5166
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9218527557453732,
+ "eval_auc": 0.9498016974139991,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7548253309937789,
+ "eval_f1_macro": 0.8541719723587154,
+ "eval_loss": 0.23359474539756775,
+ "eval_pr_auc": 0.7666273574525592,
+ "eval_precision": 0.7468434343434344,
+ "eval_precision_macro": 0.8511489898989899,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8572850976151154,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.819,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5184
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9216493797030709,
+ "eval_auc": 0.9498063690134986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7546569017672345,
+ "eval_f1_macro": 0.8540188154275592,
+ "eval_loss": 0.23359988629817963,
+ "eval_pr_auc": 0.7666725529217715,
+ "eval_precision": 0.7452830188679245,
+ "eval_precision_macro": 0.8504738723645784,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8576886070218773,
+ "eval_runtime": 0.2712,
+ "eval_samples_per_second": 600.959,
+ "eval_steps_per_second": 3.687,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5202
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9498248412965191,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7543692405465523,
+ "eval_f1_macro": 0.8537901526260393,
+ "eval_loss": 0.23362942039966583,
+ "eval_pr_auc": 0.7666566917414745,
+ "eval_precision": 0.7435014093329158,
+ "eval_precision_macro": 0.8496869717377781,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7655594969364721,
+ "eval_recall_macro": 0.8580619359493733,
+ "eval_runtime": 0.2158,
+ "eval_samples_per_second": 755.492,
+ "eval_steps_per_second": 4.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5220
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9499087646350263,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554564282300462,
+ "eval_f1_macro": 0.8545121458733945,
+ "eval_loss": 0.23345860838890076,
+ "eval_pr_auc": 0.767032300253484,
+ "eval_precision": 0.746536523929471,
+ "eval_precision_macro": 0.8511363192046093,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8580007477284299,
+ "eval_runtime": 0.2582,
+ "eval_samples_per_second": 631.391,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5238
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9226154159040065,
+ "eval_auc": 0.9500412726083274,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7567135549872123,
+ "eval_f1_macro": 0.8553519407342349,
+ "eval_loss": 0.23320625722408295,
+ "eval_pr_auc": 0.7678050059622479,
+ "eval_precision": 0.7502377179080824,
+ "eval_precision_macro": 0.8528939452496871,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.857868862635062,
+ "eval_runtime": 0.2022,
+ "eval_samples_per_second": 806.124,
+ "eval_steps_per_second": 4.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5256
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500337688516315,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7551801083838062,
+ "eval_f1_macro": 0.8543609694420392,
+ "eval_loss": 0.23323103785514832,
+ "eval_pr_auc": 0.7676369094541509,
+ "eval_precision": 0.7466120390797353,
+ "eval_precision_macro": 0.8511174775574487,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8577084515872508,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.373,
+ "eval_steps_per_second": 4.07,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5274
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500722803500048,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554140127388536,
+ "eval_f1_macro": 0.8544737053045671,
+ "eval_loss": 0.23318050801753998,
+ "eval_pr_auc": 0.7677669872107012,
+ "eval_precision": 0.7461465869770368,
+ "eval_precision_macro": 0.8509676473001504,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581016250801203,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.765,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5292
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9223103518405532,
+ "eval_auc": 0.9501381693679445,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7562998405103668,
+ "eval_f1_macro": 0.8550446157127531,
+ "eval_loss": 0.23305083811283112,
+ "eval_pr_auc": 0.7681080957655504,
+ "eval_precision": 0.7481855474913222,
+ "eval_precision_macro": 0.8519702208636682,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582120110832919,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.885,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5310
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9220052877770999,
+ "eval_auc": 0.9501577511558462,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7556546670914304,
+ "eval_f1_macro": 0.8546256879531203,
+ "eval_loss": 0.23304298520088196,
+ "eval_pr_auc": 0.7681639014007426,
+ "eval_precision": 0.7466163046899591,
+ "eval_precision_macro": 0.8512051870912047,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581619860386525,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 730.092,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5328
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9502301512155882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7564981661616967,
+ "eval_f1_macro": 0.8551582014839937,
+ "eval_loss": 0.23290005326271057,
+ "eval_pr_auc": 0.7685493782671796,
+ "eval_precision": 0.7482649842271294,
+ "eval_precision_macro": 0.8520389050120978,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8583732493935144,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.312,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5346
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9224628838722798,
+ "eval_auc": 0.9503260649928105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756739511883873,
+ "eval_f1_macro": 0.8553105125875349,
+ "eval_loss": 0.23275841772556305,
+ "eval_pr_auc": 0.768979970846171,
+ "eval_precision": 0.7487373737373737,
+ "eval_precision_macro": 0.8522777777777777,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8584336103520465,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.056,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5364
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9224120398617043,
+ "eval_auc": 0.9503799246420391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565411614550096,
+ "eval_f1_macro": 0.8551969143430849,
+ "eval_loss": 0.23264609277248383,
+ "eval_pr_auc": 0.7692428666566218,
+ "eval_precision": 0.748658035996211,
+ "eval_precision_macro": 0.8522091464751675,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582723720418239,
+ "eval_runtime": 0.2384,
+ "eval_samples_per_second": 683.765,
+ "eval_steps_per_second": 4.195,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9503983969250598,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565758010521282,
+ "eval_f1_macro": 0.8551956221484214,
+ "eval_loss": 0.23263320326805115,
+ "eval_pr_auc": 0.7693119480202146,
+ "eval_precision": 0.748108448928121,
+ "eval_precision_macro": 0.8519882690809373,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7652370203160271,
+ "eval_recall_macro": 0.8585043072244709,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.523,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5400
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9225137278828553,
+ "eval_auc": 0.9504205091626904,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7572475310608474,
+ "eval_f1_macro": 0.855573369257207,
+ "eval_loss": 0.2325783669948578,
+ "eval_pr_auc": 0.7694562109808358,
+ "eval_precision": 0.7481901164620711,
+ "eval_precision_macro": 0.8521436908185075,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.859119079986095,
+ "eval_runtime": 0.2413,
+ "eval_samples_per_second": 675.443,
+ "eval_steps_per_second": 4.144,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5418
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504706120673215,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756808408982322,
+ "eval_f1_macro": 0.8553077347570655,
+ "eval_loss": 0.23251411318778992,
+ "eval_pr_auc": 0.7697272239104135,
+ "eval_precision": 0.7476400251730648,
+ "eval_precision_macro": 0.8518369925744038,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7662044501773622,
+ "eval_recall_macro": 0.8588974807173404,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 602.022,
+ "eval_steps_per_second": 3.693,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5436
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504959749596038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7571178622554477,
+ "eval_f1_macro": 0.8554568705510044,
+ "eval_loss": 0.23248492181301117,
+ "eval_pr_auc": 0.7698215821647963,
+ "eval_precision": 0.7470182046453233,
+ "eval_precision_macro": 0.8516367567335341,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8594217120411665,
+ "eval_runtime": 0.2271,
+ "eval_samples_per_second": 717.802,
+ "eval_steps_per_second": 4.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5454
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9228187919463087,
+ "eval_auc": 0.9505587106478812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7579719387755102,
+ "eval_f1_macro": 0.8560304891070873,
+ "eval_loss": 0.23235370218753815,
+ "eval_pr_auc": 0.7701546218803492,
+ "eval_precision": 0.749605802585935,
+ "eval_precision_macro": 0.8528595176474563,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8593001628616911,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.772,
+ "eval_steps_per_second": 5.41,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5472
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9506673837312365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7583120204603581,
+ "eval_f1_macro": 0.8563023222011585,
+ "eval_loss": 0.23212042450904846,
+ "eval_pr_auc": 0.7706813722507476,
+ "eval_precision": 0.7518225039619651,
+ "eval_precision_macro": 0.8538377341465491,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8588259565825047,
+ "eval_runtime": 0.1758,
+ "eval_samples_per_second": 927.397,
+ "eval_steps_per_second": 5.69,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5490
+ },
+ {
+ "epoch": 305.55555555555554,
+ "grad_norm": 15827.6396484375,
+ "learning_rate": 3.943376017723057e-07,
+ "loss": 0.1954,
+ "step": 5500
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9506862453142154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591823698498882,
+ "eval_f1_macro": 0.8567938214370079,
+ "eval_loss": 0.23211389780044556,
+ "eval_pr_auc": 0.7707434518060764,
+ "eval_precision": 0.7519772223979754,
+ "eval_precision_macro": 0.8540585209342515,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8596019676543512,
+ "eval_runtime": 0.2349,
+ "eval_samples_per_second": 693.884,
+ "eval_steps_per_second": 4.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5508
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9506972041080411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588075880758808,
+ "eval_f1_macro": 0.8565232326853711,
+ "eval_loss": 0.23213696479797363,
+ "eval_pr_auc": 0.7706724583082463,
+ "eval_precision": 0.7503152585119798,
+ "eval_precision_macro": 0.8533038465207814,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8598442387508907,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 608.072,
+ "eval_steps_per_second": 3.731,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5526
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9229204799674599,
+ "eval_auc": 0.9507381389986547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.758521822236381,
+ "eval_f1_macro": 0.8563315143004762,
+ "eval_loss": 0.23207640647888184,
+ "eval_pr_auc": 0.7708824410889222,
+ "eval_precision": 0.7494491658797607,
+ "eval_precision_macro": 0.8528944938003498,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598847551440492,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.193,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5544
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9508327778185136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7593360995850622,
+ "eval_f1_macro": 0.8568679288369823,
+ "eval_loss": 0.2318853884935379,
+ "eval_pr_auc": 0.7713979747932131,
+ "eval_precision": 0.7516587677725118,
+ "eval_precision_macro": 0.8539545732457663,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8598640833162641,
+ "eval_runtime": 0.3164,
+ "eval_samples_per_second": 515.146,
+ "eval_steps_per_second": 3.16,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5562
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9508632794702453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7587747287811104,
+ "eval_f1_macro": 0.8565253830188363,
+ "eval_loss": 0.23183651268482208,
+ "eval_pr_auc": 0.7715329415149417,
+ "eval_precision": 0.7508683296495106,
+ "eval_precision_macro": 0.8535264016588866,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8596123035682436,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.635,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5580
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509030269959862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588844621513944,
+ "eval_f1_macro": 0.8565602855810055,
+ "eval_loss": 0.23182560503482819,
+ "eval_pr_auc": 0.7716106695707178,
+ "eval_precision": 0.7501575299306869,
+ "eval_precision_macro": 0.8532526463767658,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8599752965818471,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.932,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5598
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9509431638216853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591589678241478,
+ "eval_f1_macro": 0.8567105868221108,
+ "eval_loss": 0.23177149891853333,
+ "eval_pr_auc": 0.7718084005522707,
+ "eval_precision": 0.7500786905886057,
+ "eval_precision_macro": 0.853269895291271,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602675927230261,
+ "eval_runtime": 0.2364,
+ "eval_samples_per_second": 689.439,
+ "eval_steps_per_second": 4.23,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509867264870173,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7590380633858895,
+ "eval_f1_macro": 0.856634317411552,
+ "eval_loss": 0.23171813786029816,
+ "eval_pr_auc": 0.771954368377823,
+ "eval_precision": 0.749842668344871,
+ "eval_precision_macro": 0.8531505640086999,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602374122437602,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.85,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5634
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9228696359568843,
+ "eval_auc": 0.9509922740114228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7584010192705845,
+ "eval_f1_macro": 0.8562552937959844,
+ "eval_loss": 0.23173367977142334,
+ "eval_pr_auc": 0.7720376545527768,
+ "eval_precision": 0.7492133417243549,
+ "eval_precision_macro": 0.8527752578846153,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598545746647831,
+ "eval_runtime": 0.2664,
+ "eval_samples_per_second": 611.795,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5652
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9233780760626398,
+ "eval_auc": 0.9510762654774227,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7595340673368438,
+ "eval_f1_macro": 0.8569813431425517,
+ "eval_loss": 0.23158977925777435,
+ "eval_pr_auc": 0.7725350282702966,
+ "eval_precision": 0.7517372078332281,
+ "eval_precision_macro": 0.8540227670483556,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600253216264866,
+ "eval_runtime": 0.2525,
+ "eval_samples_per_second": 645.435,
+ "eval_steps_per_second": 3.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5670
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9511527434542277,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7601405301820504,
+ "eval_f1_macro": 0.8573636072933594,
+ "eval_loss": 0.23149563372135162,
+ "eval_pr_auc": 0.7728791764414321,
+ "eval_precision": 0.7529262891490035,
+ "eval_precision_macro": 0.8546239248495366,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8601762240228168,
+ "eval_runtime": 0.2078,
+ "eval_samples_per_second": 784.391,
+ "eval_steps_per_second": 4.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5688
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.923581452104942,
+ "eval_auc": 0.9511918583675363,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7598657932577089,
+ "eval_f1_macro": 0.8572131820235396,
+ "eval_loss": 0.23143813014030457,
+ "eval_pr_auc": 0.7730966214358813,
+ "eval_precision": 0.7530082330588981,
+ "eval_precision_macro": 0.8546082958147307,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8598839278816377,
+ "eval_runtime": 0.2098,
+ "eval_samples_per_second": 777.022,
+ "eval_steps_per_second": 4.767,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5706
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.923479764083791,
+ "eval_auc": 0.9512240340090886,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7597765363128491,
+ "eval_f1_macro": 0.8571341935895835,
+ "eval_loss": 0.23142649233341217,
+ "eval_pr_auc": 0.7731516186784236,
+ "eval_precision": 0.7522123893805309,
+ "eval_precision_macro": 0.8542630051604545,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600856825850187,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.633,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5724
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9512825457928188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7600638977635783,
+ "eval_f1_macro": 0.8573266640831436,
+ "eval_loss": 0.2313271462917328,
+ "eval_pr_auc": 0.7734563478045352,
+ "eval_precision": 0.7530864197530864,
+ "eval_precision_macro": 0.8546763493762101,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8600451661918602,
+ "eval_runtime": 0.2154,
+ "eval_samples_per_second": 756.779,
+ "eval_steps_per_second": 4.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5742
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9513326194999532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7605048729829046,
+ "eval_f1_macro": 0.8575931868618003,
+ "eval_loss": 0.23122872412204742,
+ "eval_pr_auc": 0.7736938128704516,
+ "eval_precision": 0.7536415452818239,
+ "eval_precision_macro": 0.8549855212781016,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8602667654606148,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.713,
+ "eval_steps_per_second": 4.256,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5760
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9513477438033324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602171136653896,
+ "eval_f1_macro": 0.8574005258699469,
+ "eval_loss": 0.2312333732843399,
+ "eval_pr_auc": 0.773709338696213,
+ "eval_precision": 0.7527663610496365,
+ "eval_precision_macro": 0.8545716082739852,
+ "eval_pred_class_0": 16505,
+ "eval_pred_class_1": 3163,
+ "eval_predicted_binding_ratio": 0.16081960545047794,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8603072818537733,
+ "eval_runtime": 0.2276,
+ "eval_samples_per_second": 716.105,
+ "eval_steps_per_second": 4.393,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5778
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9235306080943665,
+ "eval_auc": 0.9513562889374167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602040816326531,
+ "eval_f1_macro": 0.8573582711574831,
+ "eval_loss": 0.23121465742588043,
+ "eval_pr_auc": 0.7737540605936648,
+ "eval_precision": 0.7518133081046988,
+ "eval_precision_macro": 0.854175430193466,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8606400943881107,
+ "eval_runtime": 0.2488,
+ "eval_samples_per_second": 655.047,
+ "eval_steps_per_second": 4.019,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5796
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514191998106756,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23111233115196228,
+ "eval_pr_auc": 0.7739781495758574,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2355,
+ "eval_samples_per_second": 692.067,
+ "eval_steps_per_second": 4.246,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5814
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514517842171841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23104801774024963,
+ "eval_pr_auc": 0.7741130008089699,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.295,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5832
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.951504651151519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606128311522502,
+ "eval_f1_macro": 0.857627250169412,
+ "eval_loss": 0.23095941543579102,
+ "eval_pr_auc": 0.7744096919390852,
+ "eval_precision": 0.7529225908372827,
+ "eval_precision_macro": 0.8547076748648027,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8606297584742182,
+ "eval_runtime": 0.2175,
+ "eval_samples_per_second": 749.374,
+ "eval_steps_per_second": 4.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.951546208922066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606578317100431,
+ "eval_f1_macro": 0.8576669257120046,
+ "eval_loss": 0.23088191449642181,
+ "eval_pr_auc": 0.774641356281408,
+ "eval_precision": 0.7533206831119544,
+ "eval_precision_macro": 0.8548803827531177,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7681393099000322,
+ "eval_recall_macro": 0.8605288811225278,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.56,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5868
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9515728175742149,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7608105951811074,
+ "eval_f1_macro": 0.8577405662711912,
+ "eval_loss": 0.23083868622779846,
+ "eval_pr_auc": 0.7747608129205691,
+ "eval_precision": 0.7530006317119393,
+ "eval_precision_macro": 0.8547756764183257,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8607909967844407,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.065,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5886
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9516116113150578,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7611607142857143,
+ "eval_f1_macro": 0.8579273206076528,
+ "eval_loss": 0.23078228533267975,
+ "eval_pr_auc": 0.7749744562806883,
+ "eval_precision": 0.7527593818984547,
+ "eval_precision_macro": 0.8547393927131844,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7697516930022573,
+ "eval_recall_macro": 0.8612143507565763,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.541,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5904
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9238865161683953,
+ "eval_auc": 0.9516614027797223,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7614342629482072,
+ "eval_f1_macro": 0.8580771629311073,
+ "eval_loss": 0.2307167798280716,
+ "eval_pr_auc": 0.7752067172424865,
+ "eval_precision": 0.7526780088216761,
+ "eval_precision_macro": 0.8547553982510223,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7703966462431474,
+ "eval_recall_macro": 0.8615066468977552,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.134,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5922
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9516455582714203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7612841703750794,
+ "eval_f1_macro": 0.857914812460267,
+ "eval_loss": 0.23076769709587097,
+ "eval_pr_auc": 0.775112377066796,
+ "eval_precision": 0.750548417424005,
+ "eval_precision_macro": 0.8538504058352652,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8621420914871643,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.508,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5940
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9516963035209824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621776504297995,
+ "eval_f1_macro": 0.8584894423868002,
+ "eval_loss": 0.23067235946655273,
+ "eval_pr_auc": 0.7753820734835624,
+ "eval_precision": 0.7525935240490412,
+ "eval_precision_macro": 0.8548556265844769,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7720090293453724,
+ "eval_recall_macro": 0.8622524774903357,
+ "eval_runtime": 0.1824,
+ "eval_samples_per_second": 893.499,
+ "eval_steps_per_second": 5.482,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5958
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9517492385828104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619502868068834,
+ "eval_f1_macro": 0.8583798620967267,
+ "eval_loss": 0.23056790232658386,
+ "eval_pr_auc": 0.775607049366595,
+ "eval_precision": 0.7530708661417322,
+ "eval_precision_macro": 0.8550111500417023,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7710415994840374,
+ "eval_recall_macro": 0.8618593039974662,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.679,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5976
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9517777256072577,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621475227019276,
+ "eval_f1_macro": 0.8584929210351648,
+ "eval_loss": 0.23053352534770966,
+ "eval_pr_auc": 0.7757600766000483,
+ "eval_precision": 0.7531486146095718,
+ "eval_precision_macro": 0.855079036870636,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7713640761044824,
+ "eval_recall_macro": 0.8620205423076888,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.883,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5994
+ },
+ {
+ "epoch": 333.3333333333333,
+ "grad_norm": 16736.6328125,
+ "learning_rate": 3.021381973636964e-07,
+ "loss": 0.1913,
+ "step": 6000
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9517933365355851,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616473207187152,
+ "eval_f1_macro": 0.8581438407085573,
+ "eval_loss": 0.23052088916301727,
+ "eval_pr_auc": 0.7758187649274527,
+ "eval_precision": 0.751254705144291,
+ "eval_precision_macro": 0.8542074496595242,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8622326329249622,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.473,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6012
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9518365877609504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619198982835347,
+ "eval_f1_macro": 0.8582932017746205,
+ "eval_loss": 0.23045583069324493,
+ "eval_pr_auc": 0.7760158372270667,
+ "eval_precision": 0.7511751801942964,
+ "eval_precision_macro": 0.8542244778801185,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8625249290661412,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.186,
+ "eval_steps_per_second": 4.566,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6030
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9518931822423861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624661462482077,
+ "eval_f1_macro": 0.8586824817571539,
+ "eval_loss": 0.23035065829753876,
+ "eval_pr_auc": 0.7762652864261658,
+ "eval_precision": 0.753463476070529,
+ "eval_precision_macro": 0.855266785330923,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7716865527249275,
+ "eval_recall_macro": 0.8622119610971773,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.881,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6048
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9519021264089276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7623289850461342,
+ "eval_f1_macro": 0.8585623745200415,
+ "eval_loss": 0.2303379327058792,
+ "eval_pr_auc": 0.7763353590359,
+ "eval_precision": 0.752276295133438,
+ "eval_precision_macro": 0.8547524774823897,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625145931522488,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.828,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6066
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519062432559864,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23036180436611176,
+ "eval_pr_auc": 0.7763420780879606,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.319,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.9519363653402588,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7618291521117815,
+ "eval_f1_macro": 0.8582134440261069,
+ "eval_loss": 0.23031854629516602,
+ "eval_pr_auc": 0.776475073481046,
+ "eval_precision": 0.7503909915545824,
+ "eval_precision_macro": 0.8538853142461151,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8627266837695222,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.603,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6102
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519665750170216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23026354610919952,
+ "eval_pr_auc": 0.7766276763039114,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.893,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6120
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9520129601070512,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.230192169547081,
+ "eval_pr_auc": 0.7768138361852162,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.971,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6138
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9239882041895465,
+ "eval_auc": 0.9520488340982072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7622833518842423,
+ "eval_f1_macro": 0.8585223761569667,
+ "eval_loss": 0.2301386296749115,
+ "eval_pr_auc": 0.7769598633905366,
+ "eval_precision": 0.7518820577164367,
+ "eval_precision_macro": 0.8545818055572474,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626154705039393,
+ "eval_runtime": 0.2473,
+ "eval_samples_per_second": 659.185,
+ "eval_steps_per_second": 4.044,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6156
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9241407362212731,
+ "eval_auc": 0.952100825107636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7626471524021635,
+ "eval_f1_macro": 0.8587517153841377,
+ "eval_loss": 0.23005619645118713,
+ "eval_pr_auc": 0.7772456933395511,
+ "eval_precision": 0.7525902668759812,
+ "eval_precision_macro": 0.8549397976374689,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627060119417373,
+ "eval_runtime": 0.2129,
+ "eval_samples_per_second": 765.55,
+ "eval_steps_per_second": 4.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6174
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521250006350455,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.23000310361385345,
+ "eval_pr_auc": 0.7773917675410515,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.276,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6192
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9521700231752211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7627684964200477,
+ "eval_f1_macro": 0.8588281984687149,
+ "eval_loss": 0.22992061078548431,
+ "eval_pr_auc": 0.777567865132197,
+ "eval_precision": 0.7528266331658291,
+ "eval_precision_macro": 0.8550592763014295,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627361924210033,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.371,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6210
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521951914175242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624502784407319,
+ "eval_f1_macro": 0.8586388332084449,
+ "eval_loss": 0.22986458241939545,
+ "eval_pr_auc": 0.7777181268262345,
+ "eval_precision": 0.7525125628140703,
+ "eval_precision_macro": 0.8548719086819685,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625447736315148,
+ "eval_runtime": 0.2315,
+ "eval_samples_per_second": 704.221,
+ "eval_steps_per_second": 4.32,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6228
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9522022182817713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.22987791895866394,
+ "eval_pr_auc": 0.7777283636078421,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2021,
+ "eval_samples_per_second": 806.414,
+ "eval_steps_per_second": 4.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6246
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9522300142987927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7634340222575516,
+ "eval_f1_macro": 0.8592029398342167,
+ "eval_loss": 0.229818195104599,
+ "eval_pr_auc": 0.7778254023800715,
+ "eval_precision": 0.7529005957980558,
+ "eval_precision_macro": 0.8552111450378106,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7742663656884876,
+ "eval_recall_macro": 0.8633509651826273,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.887,
+ "eval_steps_per_second": 4.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6264
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9522554939810626,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22978660464286804,
+ "eval_pr_auc": 0.7779049389173774,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.696,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6282
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9522950273918264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638270820089001,
+ "eval_f1_macro": 0.859428369717681,
+ "eval_loss": 0.2297380119562149,
+ "eval_pr_auc": 0.7780796039517833,
+ "eval_precision": 0.7530554685051708,
+ "eval_precision_macro": 0.855346694014678,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8636734418030723,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.348,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6300
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523093341652933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636883034438978,
+ "eval_f1_macro": 0.8593074482256571,
+ "eval_loss": 0.22972844541072845,
+ "eval_pr_auc": 0.7781517759374512,
+ "eval_precision": 0.751875,
+ "eval_precision_macro": 0.8548359697595336,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639760738581439,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.342,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.9242424242424242,
+ "eval_auc": 0.952317528929415,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7635671215487146,
+ "eval_f1_macro": 0.8592310391299909,
+ "eval_loss": 0.22972537577152252,
+ "eval_pr_auc": 0.7781868067856106,
+ "eval_precision": 0.7516401124648547,
+ "eval_precision_macro": 0.8547172445484534,
+ "eval_pred_class_0": 16467,
+ "eval_pred_class_1": 3201,
+ "eval_predicted_binding_ratio": 0.16275167785234898,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639458933788778,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.7,
+ "eval_steps_per_second": 4.274,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6336
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9523571596651685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.2296588122844696,
+ "eval_pr_auc": 0.7784014772150735,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.953,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6354
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.952381568772553,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.22961482405662537,
+ "eval_pr_auc": 0.7785102930543456,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.236,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6372
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523906199965831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763838223632038,
+ "eval_f1_macro": 0.8593796791618457,
+ "eval_loss": 0.2295987904071808,
+ "eval_pr_auc": 0.7785825605072106,
+ "eval_precision": 0.7515605493133583,
+ "eval_precision_macro": 0.8547343562893321,
+ "eval_pred_class_0": 16464,
+ "eval_pred_class_1": 3204,
+ "eval_predicted_binding_ratio": 0.16290420988407567,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.8642381895200568,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.418,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6390
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9524596428791869,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636305833730727,
+ "eval_f1_macro": 0.8593156699585895,
+ "eval_loss": 0.229468435049057,
+ "eval_pr_auc": 0.7789122838326235,
+ "eval_precision": 0.7529780564263323,
+ "eval_precision_macro": 0.8552789299002641,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635122034928499,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.191,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6408
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524861542063461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.2294115126132965,
+ "eval_pr_auc": 0.7790142584142796,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.426,
+ "eval_steps_per_second": 3.911,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6426
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524829619466881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7639771283354511,
+ "eval_f1_macro": 0.8595006707052558,
+ "eval_loss": 0.2294154018163681,
+ "eval_pr_auc": 0.7789936192429844,
+ "eval_precision": 0.7527386541471048,
+ "eval_precision_macro": 0.8552438490185533,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7755562721702677,
+ "eval_recall_macro": 0.8639355574649854,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.738,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6444
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525149624032593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.2293538749217987,
+ "eval_pr_auc": 0.7791661873069065,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.132,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6462
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525288798767679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22932648658752441,
+ "eval_pr_auc": 0.7792072068588576,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2714,
+ "eval_samples_per_second": 600.631,
+ "eval_steps_per_second": 3.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6480
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9525697758373857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.22925521433353424,
+ "eval_pr_auc": 0.77936321808712,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.66,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6498
+ },
+ {
+ "epoch": 361.1111111111111,
+ "grad_norm": 21180.3203125,
+ "learning_rate": 2.1735650901333336e-07,
+ "loss": 0.1893,
+ "step": 6500
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9526172898972942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636016544702513,
+ "eval_f1_macro": 0.8593197379764267,
+ "eval_loss": 0.22917793691158295,
+ "eval_pr_auc": 0.7795955328857882,
+ "eval_precision": 0.7535321821036107,
+ "eval_precision_macro": 0.8555017581027062,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8632802683102028,
+ "eval_runtime": 0.208,
+ "eval_samples_per_second": 783.548,
+ "eval_steps_per_second": 4.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6516
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.9526382926300437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636479388826993,
+ "eval_f1_macro": 0.8593600478608577,
+ "eval_loss": 0.22912409901618958,
+ "eval_pr_auc": 0.7796869947527124,
+ "eval_precision": 0.7539283469516027,
+ "eval_precision_macro": 0.8556733812884909,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8631793909585124,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.513,
+ "eval_steps_per_second": 3.764,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6534
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.952662049659998,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7637231503579952,
+ "eval_f1_macro": 0.859396294249525,
+ "eval_loss": 0.22910362482070923,
+ "eval_pr_auc": 0.779788701256993,
+ "eval_precision": 0.7537688442211056,
+ "eval_precision_macro": 0.8556213791598126,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633104487894689,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.175,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9526903420344663,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638446849140674,
+ "eval_f1_macro": 0.8594728689002142,
+ "eval_loss": 0.2290574461221695,
+ "eval_pr_auc": 0.7799354479263911,
+ "eval_precision": 0.7540056550424128,
+ "eval_precision_macro": 0.8557410744123195,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633406292687349,
+ "eval_runtime": 0.2049,
+ "eval_samples_per_second": 795.329,
+ "eval_steps_per_second": 4.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6570
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9527021767531981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7640699523052464,
+ "eval_f1_macro": 0.8595814265550925,
+ "eval_loss": 0.22903695702552795,
+ "eval_pr_auc": 0.7799769457420497,
+ "eval_precision": 0.753527751646284,
+ "eval_precision_macro": 0.8555854062558139,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8637338027616044,
+ "eval_runtime": 0.2118,
+ "eval_samples_per_second": 769.509,
+ "eval_steps_per_second": 4.721,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6588
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9247000203376042,
+ "eval_auc": 0.9527121622971282,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.764659145081837,
+ "eval_f1_macro": 0.8599193797618125,
+ "eval_loss": 0.22902432084083557,
+ "eval_pr_auc": 0.7800307850119022,
+ "eval_precision": 0.7537593984962406,
+ "eval_precision_macro": 0.8557884149558164,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864217517692272,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.072,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6606
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9527291941703031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7649769585253456,
+ "eval_f1_macro": 0.8601085500794873,
+ "eval_loss": 0.22899393737316132,
+ "eval_pr_auc": 0.7800749822284204,
+ "eval_precision": 0.7540726817042607,
+ "eval_precision_macro": 0.85597540373147,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644089364817604,
+ "eval_runtime": 0.2331,
+ "eval_samples_per_second": 699.217,
+ "eval_steps_per_second": 4.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6624
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527588782921224,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22894835472106934,
+ "eval_pr_auc": 0.780237083741907,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.902,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6642
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527729125556185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22892294824123383,
+ "eval_pr_auc": 0.7803195480022762,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.873,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6660
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527883872289603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650985378258105,
+ "eval_f1_macro": 0.8601851483463878,
+ "eval_loss": 0.22889479994773865,
+ "eval_pr_auc": 0.7804143746656889,
+ "eval_precision": 0.7543089940457537,
+ "eval_precision_macro": 0.8560948381043845,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644391169610264,
+ "eval_runtime": 0.2368,
+ "eval_samples_per_second": 688.489,
+ "eval_steps_per_second": 4.224,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6678
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9528159885960027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653418124006359,
+ "eval_f1_macro": 0.860338399996844,
+ "eval_loss": 0.22885586321353912,
+ "eval_pr_auc": 0.7805625189044384,
+ "eval_precision": 0.7547820633427407,
+ "eval_precision_macro": 0.8563339286918206,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644994779195585,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.352,
+ "eval_steps_per_second": 5.407,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6696
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9528519015171544,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655852417302799,
+ "eval_f1_macro": 0.8604917251982311,
+ "eval_loss": 0.22878196835517883,
+ "eval_pr_auc": 0.7807742707975688,
+ "eval_precision": 0.7552557263884531,
+ "eval_precision_macro": 0.8565733155332836,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645598388780904,
+ "eval_runtime": 0.2727,
+ "eval_samples_per_second": 597.724,
+ "eval_steps_per_second": 3.667,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6714
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528673372605004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657070144743121,
+ "eval_f1_macro": 0.8605684154038177,
+ "eval_loss": 0.22877708077430725,
+ "eval_pr_auc": 0.7808321396342274,
+ "eval_precision": 0.7554927809165097,
+ "eval_precision_macro": 0.85669312022406,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645900193573565,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.842,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6732
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528682131854067,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659303988558716,
+ "eval_f1_macro": 0.8606760610325117,
+ "eval_loss": 0.2287902534008026,
+ "eval_pr_auc": 0.7808487230478494,
+ "eval_precision": 0.7550125313283208,
+ "eval_precision_macro": 0.8565363700584309,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8649831928502261,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.251,
+ "eval_steps_per_second": 5.388,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.9250050844010576,
+ "eval_auc": 0.9528953279275011,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655380702591003,
+ "eval_f1_macro": 0.8604509839871686,
+ "eval_loss": 0.228745236992836,
+ "eval_pr_auc": 0.7809833435589818,
+ "eval_precision": 0.754858934169279,
+ "eval_precision_macro": 0.8564014297014619,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864660716229781,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.614,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6768
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9529040482465667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7651262505955216,
+ "eval_f1_macro": 0.8601804865980422,
+ "eval_loss": 0.2287396788597107,
+ "eval_pr_auc": 0.781048839864553,
+ "eval_precision": 0.7537546933667084,
+ "eval_precision_macro": 0.8558720042841312,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8646710521436733,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.891,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9529144620204507,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653968253968254,
+ "eval_f1_macro": 0.8603288764349426,
+ "eval_loss": 0.22873102128505707,
+ "eval_pr_auc": 0.7810643100928254,
+ "eval_precision": 0.7536730228196311,
+ "eval_precision_macro": 0.8558880628094148,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8649633482848524,
+ "eval_runtime": 0.1843,
+ "eval_samples_per_second": 884.276,
+ "eval_steps_per_second": 5.425,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6804
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9529275424990492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657142857142857,
+ "eval_f1_macro": 0.8605178766021483,
+ "eval_loss": 0.22870197892189026,
+ "eval_pr_auc": 0.7811376412515475,
+ "eval_precision": 0.7539856205064083,
+ "eval_precision_macro": 0.8560747217232387,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8651547670743409,
+ "eval_runtime": 0.2724,
+ "eval_samples_per_second": 598.424,
+ "eval_steps_per_second": 3.671,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6822
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9529545793811519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660047656870532,
+ "eval_f1_macro": 0.8607118952674847,
+ "eval_loss": 0.22864677011966705,
+ "eval_pr_auc": 0.7812048860219004,
+ "eval_precision": 0.7548528490920476,
+ "eval_precision_macro": 0.8564843339790698,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8651142506811824,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.062,
+ "eval_steps_per_second": 5.387,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6840
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9529760687388493,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658087067047982,
+ "eval_f1_macro": 0.8605994081311654,
+ "eval_loss": 0.2286224663257599,
+ "eval_pr_auc": 0.7813066136655398,
+ "eval_precision": 0.7547760726589414,
+ "eval_precision_macro": 0.8564168678924449,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.86495301237096,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.403,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6858
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530162639595422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22855480015277863,
+ "eval_pr_auc": 0.7815011919374028,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.397,
+ "eval_steps_per_second": 3.745,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6876
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530368384623376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22851014137268066,
+ "eval_pr_auc": 0.7815937686262128,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.766,
+ "eval_steps_per_second": 3.729,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6894
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530639240069352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22846660017967224,
+ "eval_pr_auc": 0.7817366980630457,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.734,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6912
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530865131370146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22843268513679504,
+ "eval_pr_auc": 0.7818405401720232,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.559,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6930
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530828926474026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659033078880407,
+ "eval_f1_macro": 0.8606810172942987,
+ "eval_loss": 0.22846029698848724,
+ "eval_pr_auc": 0.781765942321457,
+ "eval_precision": 0.7555695010982115,
+ "eval_precision_macro": 0.8567605408530922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864751257667579,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.151,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6948
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9530829315773984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665184243964421,
+ "eval_f1_macro": 0.8610134494863566,
+ "eval_loss": 0.22847168147563934,
+ "eval_pr_auc": 0.7818060776753192,
+ "eval_precision": 0.7552425665101722,
+ "eval_precision_macro": 0.8567386267869261,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8654669077808935,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.429,
+ "eval_steps_per_second": 4.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6966
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.953101403860419,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668838391863976,
+ "eval_f1_macro": 0.861243571985536,
+ "eval_loss": 0.2284410148859024,
+ "eval_pr_auc": 0.7818375459402719,
+ "eval_precision": 0.7559523809523809,
+ "eval_precision_macro": 0.8570973363853918,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655574492186915,
+ "eval_runtime": 0.2357,
+ "eval_samples_per_second": 691.702,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6984
+ },
+ {
+ "epoch": 388.8888888888889,
+ "grad_norm": 17393.9921875,
+ "learning_rate": 1.4317094954644378e-07,
+ "loss": 0.1876,
+ "step": 7000
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9531112726143616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671537484116899,
+ "eval_f1_macro": 0.8613916441816318,
+ "eval_loss": 0.22843530774116516,
+ "eval_pr_auc": 0.7818710932290109,
+ "eval_precision": 0.755868544600939,
+ "eval_precision_macro": 0.8571123212290193,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8658497453598706,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.309,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7002
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9531541734697645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.2283545583486557,
+ "eval_pr_auc": 0.7820972392670548,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.253,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9531671274258764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665394402035624,
+ "eval_f1_macro": 0.8610596014864338,
+ "eval_loss": 0.22832486033439636,
+ "eval_pr_auc": 0.7821584295330316,
+ "eval_precision": 0.7561970505177282,
+ "eval_precision_macro": 0.8571349914927091,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.865134095246556,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.118,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7038
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.953186076601346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22829268872737885,
+ "eval_pr_auc": 0.7822636305739935,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.357,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7056
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532213958400613,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.2282164841890335,
+ "eval_pr_auc": 0.782458419213003,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.21,
+ "eval_samples_per_second": 776.053,
+ "eval_steps_per_second": 4.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7074
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9532409095004704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657542966263526,
+ "eval_f1_macro": 0.8606092068875439,
+ "eval_loss": 0.22819304466247559,
+ "eval_pr_auc": 0.7825471573946872,
+ "eval_precision": 0.7558906691800189,
+ "eval_precision_macro": 0.8568655651025967,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864489142005666,
+ "eval_runtime": 0.1952,
+ "eval_samples_per_second": 835.24,
+ "eval_steps_per_second": 5.124,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7092
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532573574237078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659980897803247,
+ "eval_f1_macro": 0.8607627043564902,
+ "eval_loss": 0.2281719297170639,
+ "eval_pr_auc": 0.7826366824043385,
+ "eval_precision": 0.7563659226658284,
+ "eval_precision_macro": 0.8571057489837905,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8645495029641981,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.649,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7110
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532628173556228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.22817298769950867,
+ "eval_pr_auc": 0.7826246984855115,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.545,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7128
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532649585053934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7662213740458015,
+ "eval_f1_macro": 0.8608703093903662,
+ "eval_loss": 0.22816696763038635,
+ "eval_pr_auc": 0.7826996340764835,
+ "eval_precision": 0.7558832758079699,
+ "eval_precision_macro": 0.8569477661729006,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8649426764570676,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.822,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7146
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532878980054353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22812943160533905,
+ "eval_pr_auc": 0.7828570635819191,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.707,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7164
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9533086671582098,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660725652450668,
+ "eval_f1_macro": 0.860798596552099,
+ "eval_loss": 0.2281065434217453,
+ "eval_pr_auc": 0.7829274286728394,
+ "eval_precision": 0.7562048382029531,
+ "eval_precision_macro": 0.8570529802176428,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646805607951544,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.497,
+ "eval_steps_per_second": 4.028,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7182
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.953327003186245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661944930765557,
+ "eval_f1_macro": 0.8608753604764983,
+ "eval_loss": 0.22807644307613373,
+ "eval_pr_auc": 0.7830440116061308,
+ "eval_precision": 0.7564424890006285,
+ "eval_precision_macro": 0.8571730824234005,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8647107412744205,
+ "eval_runtime": 0.221,
+ "eval_samples_per_second": 737.532,
+ "eval_steps_per_second": 4.525,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7200
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9533311297658027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.22808308899402618,
+ "eval_pr_auc": 0.7830491617637381,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.406,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7218
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9533512371086481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766390833863781,
+ "eval_f1_macro": 0.8609879862166538,
+ "eval_loss": 0.2280474752187729,
+ "eval_pr_auc": 0.7831121530747555,
+ "eval_precision": 0.7565190072258875,
+ "eval_precision_macro": 0.857240395332689,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864871979584643,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.294,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7236
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533569987480307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22804181277751923,
+ "eval_pr_auc": 0.7831234273165448,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.798,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9533826536152816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.2279965728521347,
+ "eval_pr_auc": 0.78327266335354,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.619,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7272
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533846682425657,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22799374163150787,
+ "eval_pr_auc": 0.7832776890714148,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.577,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7290
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533931647141554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.2279902696609497,
+ "eval_pr_auc": 0.7833151045367318,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.534,
+ "eval_steps_per_second": 4.003,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7308
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9533809990904591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22801372408866882,
+ "eval_pr_auc": 0.7832695956978404,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.354,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7326
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9533805708605049,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671058898237816,
+ "eval_f1_macro": 0.8613505657612415,
+ "eval_loss": 0.2280135303735733,
+ "eval_pr_auc": 0.7832983031509244,
+ "eval_precision": 0.7554721701063164,
+ "eval_precision_macro": 0.8569406995036744,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865950622711561,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.9,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7344
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533953253289238,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672276913305811,
+ "eval_f1_macro": 0.8614272726281818,
+ "eval_loss": 0.22799338400363922,
+ "eval_pr_auc": 0.7833637706556547,
+ "eval_precision": 0.7557084766969033,
+ "eval_precision_macro": 0.857060115344384,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865980803190827,
+ "eval_runtime": 0.2998,
+ "eval_samples_per_second": 543.699,
+ "eval_steps_per_second": 3.336,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7362
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9534269948805303,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672496025437202,
+ "eval_f1_macro": 0.8614738601594714,
+ "eval_loss": 0.22793784737586975,
+ "eval_pr_auc": 0.783492627588611,
+ "eval_precision": 0.7566635308874256,
+ "eval_precision_macro": 0.8574567123458305,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8656479906564896,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.309,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7380
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534421775789035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22790838778018951,
+ "eval_pr_auc": 0.7835790311478947,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.852,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7398
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534503918080233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22789432108402252,
+ "eval_pr_auc": 0.7836065049249683,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.102,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7416
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534621486667634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.2278737723827362,
+ "eval_pr_auc": 0.7836530325514856,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2015,
+ "eval_samples_per_second": 809.132,
+ "eval_steps_per_second": 4.964,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7434
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534808739947569,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668575063613231,
+ "eval_f1_macro": 0.8612488935825013,
+ "eval_loss": 0.22784681618213654,
+ "eval_pr_auc": 0.7837271248134856,
+ "eval_precision": 0.7565108252274867,
+ "eval_precision_macro": 0.8573222168125176,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653255140360445,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.938,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7452
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535030056973854,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.2278076857328415,
+ "eval_pr_auc": 0.783829043372685,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2388,
+ "eval_samples_per_second": 682.596,
+ "eval_steps_per_second": 4.188,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7470
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9535103634665969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.22778868675231934,
+ "eval_pr_auc": 0.7838714010488458,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.742,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 416.6666666666667,
+ "grad_norm": 16683.39453125,
+ "learning_rate": 8.236268949930852e-08,
+ "loss": 0.186,
+ "step": 7500
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535206409854957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777557373046875,
+ "eval_pr_auc": 0.7839160806088992,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.595,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7506
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953516261360965,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22778432071208954,
+ "eval_pr_auc": 0.7838825807566469,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.836,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7524
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953526694599847,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777114808559418,
+ "eval_pr_auc": 0.783922204343384,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.269,
+ "eval_steps_per_second": 3.805,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7542
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535399113334307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22775039076805115,
+ "eval_pr_auc": 0.7839806097795337,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.883,
+ "eval_steps_per_second": 3.735,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7560
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535487289774859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22773513197898865,
+ "eval_pr_auc": 0.7840321361391863,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2732,
+ "eval_samples_per_second": 596.635,
+ "eval_steps_per_second": 3.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7578
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535455951128217,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774243354797363,
+ "eval_pr_auc": 0.7840612126983214,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.347,
+ "eval_steps_per_second": 5.284,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7596
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535476389376027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774267196655273,
+ "eval_pr_auc": 0.7840628987467491,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.36,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7614
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535528458245448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670057215511761,
+ "eval_f1_macro": 0.8613203162894483,
+ "eval_loss": 0.22773417830467224,
+ "eval_pr_auc": 0.7840834648119666,
+ "eval_precision": 0.756189282356628,
+ "eval_precision_macro": 0.8572170542389439,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655876296979576,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.927,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7632
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535637559558758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.22771182656288147,
+ "eval_pr_auc": 0.7841366739811415,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.859,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7650
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535682523703939,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22770953178405762,
+ "eval_pr_auc": 0.7841596739317596,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.83,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7668
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535677852104438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674714104193139,
+ "eval_f1_macro": 0.8615807415292696,
+ "eval_loss": 0.22770845890045166,
+ "eval_pr_auc": 0.7841620948509175,
+ "eval_precision": 0.7561815336463223,
+ "eval_precision_macro": 0.8572991684500658,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.866041164149359,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.948,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7686
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535825980738566,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22767424583435059,
+ "eval_pr_auc": 0.7842346625186999,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.669,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7704
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535961651774029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22765418887138367,
+ "eval_pr_auc": 0.7843006481928805,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.28,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536132165155758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22762420773506165,
+ "eval_pr_auc": 0.7844000578756268,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.732,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7740
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536164866352254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22761479020118713,
+ "eval_pr_auc": 0.7844231309752159,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.783,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7758
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536227348995558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760987281799316,
+ "eval_pr_auc": 0.7844561314285999,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.142,
+ "eval_steps_per_second": 4.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7776
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536278736590051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760248184204102,
+ "eval_pr_auc": 0.7844848515258783,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.759,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7794
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536346669432771,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.22759221494197845,
+ "eval_pr_auc": 0.7845111553142384,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.216,
+ "eval_samples_per_second": 754.683,
+ "eval_steps_per_second": 4.63,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7812
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536369248830353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275882065296173,
+ "eval_pr_auc": 0.7845220230482824,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.083,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7830
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536390660328059,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7678855325914149,
+ "eval_f1_macro": 0.8618523468803471,
+ "eval_loss": 0.2275806963443756,
+ "eval_pr_auc": 0.7845372505398349,
+ "eval_precision": 0.7572906867356538,
+ "eval_precision_macro": 0.8578309735638339,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660308282354665,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.493,
+ "eval_steps_per_second": 3.991,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7848
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536480004668485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275666743516922,
+ "eval_pr_auc": 0.7845807540266186,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.1853,
+ "eval_samples_per_second": 879.496,
+ "eval_steps_per_second": 5.396,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7866
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536552025160767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22755169868469238,
+ "eval_pr_auc": 0.7846223392518402,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2108,
+ "eval_samples_per_second": 773.198,
+ "eval_steps_per_second": 4.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7884
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.95366627809989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2275334894657135,
+ "eval_pr_auc": 0.784667391259121,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.311,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7902
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536672708147835,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753211855888367,
+ "eval_pr_auc": 0.7846795397266301,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2424,
+ "eval_samples_per_second": 672.466,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7920
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536694119645541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753164172172546,
+ "eval_pr_auc": 0.7846730527925044,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.862,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7938
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536803123633861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7677378300986318,
+ "eval_f1_macro": 0.8617811692096791,
+ "eval_loss": 0.22751472890377045,
+ "eval_pr_auc": 0.7847227034562287,
+ "eval_precision": 0.7576138147566719,
+ "eval_precision_macro": 0.8579369201187351,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657687125735536,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.172,
+ "eval_steps_per_second": 3.75,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536880691650549,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.22749866545200348,
+ "eval_pr_auc": 0.7847641543874231,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2405,
+ "eval_samples_per_second": 677.868,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7974
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536937626769448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2274913638830185,
+ "eval_pr_auc": 0.7847876605630844,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.896,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7992
+ },
+ {
+ "epoch": 444.44444444444446,
+ "grad_norm": 19008.333984375,
+ "learning_rate": 3.72113927636733e-08,
+ "loss": 0.1854,
+ "step": 8000
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.953696935471605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748790681362152,
+ "eval_pr_auc": 0.7848011703648987,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.861,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8010
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536995243163275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748683393001556,
+ "eval_pr_auc": 0.7848132010793348,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.648,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8028
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537016265361022,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748340666294098,
+ "eval_pr_auc": 0.7848243277439235,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.254,
+ "eval_steps_per_second": 4.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8046
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537030474809498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.2274865061044693,
+ "eval_pr_auc": 0.7848313481583303,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.69,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8064
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537053443507039,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22748111188411713,
+ "eval_pr_auc": 0.7848426067264029,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2031,
+ "eval_samples_per_second": 802.733,
+ "eval_steps_per_second": 4.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8082
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537081765079003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747540473937988,
+ "eval_pr_auc": 0.7848573104950377,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.273,
+ "eval_steps_per_second": 4.413,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8100
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537093638727732,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747638821601868,
+ "eval_pr_auc": 0.7848599055730325,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.395,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8118
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953713694834809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746768593788147,
+ "eval_pr_auc": 0.7848753398216984,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.726,
+ "eval_steps_per_second": 4.011,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8136
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537144345047297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746726870536804,
+ "eval_pr_auc": 0.7848749928109487,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.248,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8154
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537162252845378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746579349040985,
+ "eval_pr_auc": 0.7848822771158814,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.74,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8172
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537124685399404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747564315795898,
+ "eval_pr_auc": 0.7848568499525364,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.446,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537148724671828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747227549552917,
+ "eval_pr_auc": 0.7848699531009984,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2171,
+ "eval_samples_per_second": 750.821,
+ "eval_steps_per_second": 4.606,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8208
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537148043396901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747254371643066,
+ "eval_pr_auc": 0.784871702941944,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2395,
+ "eval_samples_per_second": 680.611,
+ "eval_steps_per_second": 4.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8226
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537153006971368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747036814689636,
+ "eval_pr_auc": 0.7848700532008381,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.347,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8244
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537154856146172,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747208178043365,
+ "eval_pr_auc": 0.7848674676035483,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2162,
+ "eval_samples_per_second": 753.78,
+ "eval_steps_per_second": 4.624,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8262
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537164783295108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22747138142585754,
+ "eval_pr_auc": 0.7848698987106607,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.29,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8280
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537188335942584,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.2274673730134964,
+ "eval_pr_auc": 0.7848777447449568,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.971,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8298
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537204881190811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22746412456035614,
+ "eval_pr_auc": 0.7848871874390749,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2183,
+ "eval_samples_per_second": 746.701,
+ "eval_steps_per_second": 4.581,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8316
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537223956888766,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274623066186905,
+ "eval_pr_auc": 0.784896188141376,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.736,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8334
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537243227236701,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274596393108368,
+ "eval_pr_auc": 0.7849067717237345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.711,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8352
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537252959735659,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745810449123383,
+ "eval_pr_auc": 0.7849091375774293,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.717,
+ "eval_steps_per_second": 3.863,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8370
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537274565883342,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274550497531891,
+ "eval_pr_auc": 0.7849133909574995,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.839,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8388
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537294225531237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274521142244339,
+ "eval_pr_auc": 0.7849233363238722,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.858,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8406
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537300065030612,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.227451354265213,
+ "eval_pr_auc": 0.7849296985834374,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.691,
+ "eval_steps_per_second": 3.759,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.953731164670437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744858264923096,
+ "eval_pr_auc": 0.7849332304496031,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1831,
+ "eval_samples_per_second": 890.399,
+ "eval_steps_per_second": 5.463,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8442
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537301622230444,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745059430599213,
+ "eval_pr_auc": 0.7849302221742441,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.282,
+ "eval_samples_per_second": 577.963,
+ "eval_steps_per_second": 3.546,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8460
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.95373253695279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274470180273056,
+ "eval_pr_auc": 0.7849404295185345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.882,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8478
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537337243176629,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744475305080414,
+ "eval_pr_auc": 0.7849440080112278,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.737,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8496
+ },
+ {
+ "epoch": 472.22222222222223,
+ "grad_norm": 16415.080078125,
+ "learning_rate": 9.409753403698373e-09,
+ "loss": 0.185,
+ "step": 8500
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537344250575877,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849494179086262,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.078,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8514
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537351647275085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849503915002546,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.686,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8532
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537357876074417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744180262088776,
+ "eval_pr_auc": 0.7849530625634429,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1819,
+ "eval_samples_per_second": 895.935,
+ "eval_steps_per_second": 5.497,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8550
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537367997873333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274399697780609,
+ "eval_pr_auc": 0.7849568926584508,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 633.091,
+ "eval_steps_per_second": 3.884,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8568
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537370139023102,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274392694234848,
+ "eval_pr_auc": 0.784958754067368,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.802,
+ "eval_steps_per_second": 3.747,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8586
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537376562472413,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22743819653987885,
+ "eval_pr_auc": 0.7849582401594454,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.444,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8604
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537377925022268,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849574280187238,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.921,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8622
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537384932421518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743669152259827,
+ "eval_pr_auc": 0.7849621773766102,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.492,
+ "eval_steps_per_second": 3.776,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8640
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382012671831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849637957606732,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.647,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537380066172038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.227437824010849,
+ "eval_pr_auc": 0.7849623241007161,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1901,
+ "eval_samples_per_second": 857.378,
+ "eval_steps_per_second": 5.26,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8676
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382791271747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743763029575348,
+ "eval_pr_auc": 0.7849636397572854,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.618,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8694
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537387365546257,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743697464466095,
+ "eval_pr_auc": 0.7849651242159179,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.398,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8712
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537391745170787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274360954761505,
+ "eval_pr_auc": 0.7849672499360805,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.673,
+ "eval_steps_per_second": 5.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8730
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739330237062,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.784968007514094,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.342,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8748
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.95373934970206,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.7849684807581643,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2101,
+ "eval_samples_per_second": 775.889,
+ "eval_steps_per_second": 4.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8766
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537393886320558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274361550807953,
+ "eval_pr_auc": 0.7849697577570401,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 652.0,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8784
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537394859570454,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743597626686096,
+ "eval_pr_auc": 0.784969480914725,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1914,
+ "eval_samples_per_second": 851.633,
+ "eval_steps_per_second": 5.225,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8802
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395540845381,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274358570575714,
+ "eval_pr_auc": 0.784970228255774,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.685,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8820
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395443520391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743581235408783,
+ "eval_pr_auc": 0.7849691356929127,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.26,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8838
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396222120308,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743573784828186,
+ "eval_pr_auc": 0.7849709563979171,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.069,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8856
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 9000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 6657228093507264.0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca
+size 5368
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..344739af9ae8101f3177280930f96088f9f9607b
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd6b4e359b5d6cb87fd11e6785513fb93cd33e2621e530c3d5d0372c61965079
+size 61385376
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8eb584ea45d85a13f7833c0d1ca2a4c9b5ce620
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7657ee28cb041646b9be46a93baffab2afaf1966b7a9f81a83df51bd473eff
+size 122881658
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ea4a281732ef8791236faff0159461d69c164230
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae123764811139e2cb14efaf37527d6fd81efe423a4a92fa24d82e5d54b5501c
+size 14244
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a57622d23c8c6d1195ffb892b5e96972f71046de
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2cddf9c2f8aa06961889d4d18e579770b9648558e3b3dd47f47b9be5707aaa0
+size 1064
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bbfb4e2ce03cd348800695619849bfc23008822e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/trainer_state.json
@@ -0,0 +1,12129 @@
+{
+ "best_global_step": 8856,
+ "best_metric": 0.7849709563979171,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856",
+ "epoch": 499.0,
+ "eval_steps": 500,
+ "global_step": 8982,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3401972747610332,
+ "eval_auc": 0.39064302367564674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25900759435847653,
+ "eval_f1_macro": 0.3321798728791878,
+ "eval_loss": 1.0617570877075195,
+ "eval_pr_auc": 0.1212308124824295,
+ "eval_precision": 0.15736885928393005,
+ "eval_precision_macro": 0.49944165947453734,
+ "eval_pred_class_0": 5256,
+ "eval_pred_class_1": 14412,
+ "eval_predicted_binding_ratio": 0.7327638804148872,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.4991767473782156,
+ "eval_runtime": 0.304,
+ "eval_samples_per_second": 536.239,
+ "eval_steps_per_second": 3.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 18
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.341010778930242,
+ "eval_auc": 0.39081343973238586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2590750585948665,
+ "eval_f1_macro": 0.33285209231533375,
+ "eval_loss": 1.0604556798934937,
+ "eval_pr_auc": 0.12126612292918731,
+ "eval_precision": 0.1574485825458588,
+ "eval_precision_macro": 0.4995923731531417,
+ "eval_pred_class_0": 5276,
+ "eval_pred_class_1": 14392,
+ "eval_predicted_binding_ratio": 0.731747000203376,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.4993975193845588,
+ "eval_runtime": 0.2793,
+ "eval_samples_per_second": 583.516,
+ "eval_steps_per_second": 3.58,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 36
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.34223103518405534,
+ "eval_auc": 0.3911369382652214,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591765446944969,
+ "eval_f1_macro": 0.33385837704253485,
+ "eval_loss": 1.058252215385437,
+ "eval_pr_auc": 0.12133107613942488,
+ "eval_precision": 0.15756858376270713,
+ "eval_precision_macro": 0.4998170849458089,
+ "eval_pred_class_0": 5306,
+ "eval_pred_class_1": 14362,
+ "eval_predicted_binding_ratio": 0.7302216798861094,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.49972867739407356,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.165,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 54
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.3441122635753508,
+ "eval_auc": 0.3915867840995182,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259556882103088,
+ "eval_f1_macro": 0.33544605079873757,
+ "eval_loss": 1.0551481246948242,
+ "eval_pr_auc": 0.12142208631760734,
+ "eval_precision": 0.15788003631031353,
+ "eval_precision_macro": 0.500391299247358,
+ "eval_pred_class_0": 5347,
+ "eval_pred_class_1": 14321,
+ "eval_predicted_binding_ratio": 0.7281370754525117,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5005832394650029,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.796,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 72
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3457392719137686,
+ "eval_auc": 0.39218283153314865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2597791072250345,
+ "eval_f1_macro": 0.3367955302889021,
+ "eval_loss": 1.0511513948440552,
+ "eval_pr_auc": 0.12154600341235242,
+ "eval_precision": 0.15809003710705033,
+ "eval_precision_macro": 0.5007720380521324,
+ "eval_pred_class_0": 5385,
+ "eval_pred_class_1": 14283,
+ "eval_predicted_binding_ratio": 0.7262050030506406,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011558413086458,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.078,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 90
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.34873906853772624,
+ "eval_auc": 0.39291782012189097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259894840238054,
+ "eval_f1_macro": 0.33921701928376435,
+ "eval_loss": 1.0462485551834106,
+ "eval_pr_auc": 0.1216940029412557,
+ "eval_precision": 0.1583133887089962,
+ "eval_precision_macro": 0.5011632853468087,
+ "eval_pred_class_0": 5462,
+ "eval_pred_class_1": 14206,
+ "eval_predicted_binding_ratio": 0.722290014236323,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.5017569691067321,
+ "eval_runtime": 0.2393,
+ "eval_samples_per_second": 681.219,
+ "eval_steps_per_second": 4.179,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 108
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3517897091722595,
+ "eval_auc": 0.3937714770704174,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2599407906193766,
+ "eval_f1_macro": 0.3416488972772128,
+ "eval_loss": 1.0405118465423584,
+ "eval_pr_auc": 0.12187498322705145,
+ "eval_precision": 0.1585020529520034,
+ "eval_precision_macro": 0.5014812682659693,
+ "eval_pred_class_0": 5542,
+ "eval_pred_class_1": 14126,
+ "eval_predicted_binding_ratio": 0.7182224933902787,
+ "eval_recall": 0.7220251531763947,
+ "eval_recall_macro": 0.5022572195531276,
+ "eval_runtime": 0.2765,
+ "eval_samples_per_second": 589.571,
+ "eval_steps_per_second": 3.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 126
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.354586129753915,
+ "eval_auc": 0.3947741191129793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591338858410179,
+ "eval_f1_macro": 0.3436917965373002,
+ "eval_loss": 1.0338975191116333,
+ "eval_pr_auc": 0.12208733120990471,
+ "eval_precision": 0.1581985320316397,
+ "eval_precision_macro": 0.5009271275952343,
+ "eval_pred_class_0": 5635,
+ "eval_pred_class_1": 14033,
+ "eval_predicted_binding_ratio": 0.713494000406752,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.5014270471245847,
+ "eval_runtime": 0.2385,
+ "eval_samples_per_second": 683.567,
+ "eval_steps_per_second": 4.194,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 144
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3598230628431971,
+ "eval_auc": 0.39592411118975185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25913503971756396,
+ "eval_f1_macro": 0.3477761944928628,
+ "eval_loss": 1.0263975858688354,
+ "eval_pr_auc": 0.12233214426039367,
+ "eval_precision": 0.15848567727076435,
+ "eval_precision_macro": 0.5013938604573427,
+ "eval_pred_class_0": 5774,
+ "eval_pred_class_1": 13894,
+ "eval_predicted_binding_ratio": 0.70642668293675,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.502176595531767,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.873,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 162
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.3636872076469392,
+ "eval_auc": 0.3972021050928084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25801861623288075,
+ "eval_f1_macro": 0.35051449303788773,
+ "eval_loss": 1.0180495977401733,
+ "eval_pr_auc": 0.12260540333611444,
+ "eval_precision": 0.15807060874618625,
+ "eval_precision_macro": 0.5006720376838353,
+ "eval_pred_class_0": 5902,
+ "eval_pred_class_1": 13766,
+ "eval_predicted_binding_ratio": 0.6999186495830791,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.5010628083511147,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 601.838,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 180
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.3691275167785235,
+ "eval_auc": 0.39866621357342186,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.258604206500956,
+ "eval_f1_macro": 0.35478882891419483,
+ "eval_loss": 1.0087939500808716,
+ "eval_pr_auc": 0.12292033936081492,
+ "eval_precision": 0.1587092042537587,
+ "eval_precision_macro": 0.5016983780261003,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.5027194256611,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.126,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 198
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3740593858043523,
+ "eval_auc": 0.4002664991794433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25823944086280654,
+ "eval_f1_macro": 0.35841740282728696,
+ "eval_loss": 0.9987770318984985,
+ "eval_pr_auc": 0.12326823892822446,
+ "eval_precision": 0.15878778897451096,
+ "eval_precision_macro": 0.5017853397238077,
+ "eval_pred_class_0": 6172,
+ "eval_pred_class_1": 13496,
+ "eval_predicted_binding_ratio": 0.6861907667276794,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.5028947176998165,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.708,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 216
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.37980475899938987,
+ "eval_auc": 0.40207323055334293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25811945018854154,
+ "eval_f1_macro": 0.36265804779890953,
+ "eval_loss": 0.987876832485199,
+ "eval_pr_auc": 0.12366119818610516,
+ "eval_precision": 0.15905854133873024,
+ "eval_precision_macro": 0.5021624301446299,
+ "eval_pred_class_0": 6327,
+ "eval_pred_class_1": 13341,
+ "eval_predicted_binding_ratio": 0.6783099450884685,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.5035528974067893,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.68,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.3867703884482408,
+ "eval_auc": 0.40404318566725245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2582866982350409,
+ "eval_f1_macro": 0.36779990383041317,
+ "eval_loss": 0.9760332107543945,
+ "eval_pr_auc": 0.12409453800524387,
+ "eval_precision": 0.1595744680851064,
+ "eval_precision_macro": 0.5028818867776484,
+ "eval_pred_class_0": 6508,
+ "eval_pred_class_1": 13160,
+ "eval_predicted_binding_ratio": 0.6691071791742933,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.5048043507851898,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.919,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 252
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.39429530201342283,
+ "eval_auc": 0.40629005957398867,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2577107607950651,
+ "eval_f1_macro": 0.37306889008104693,
+ "eval_loss": 0.9632152915000916,
+ "eval_pr_auc": 0.12458802431940903,
+ "eval_precision": 0.15971578622181032,
+ "eval_precision_macro": 0.5029977740632862,
+ "eval_pred_class_0": 6720,
+ "eval_pred_class_1": 12948,
+ "eval_predicted_binding_ratio": 0.6583282489322758,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.5050772111259515,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.248,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 270
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.4020744356314826,
+ "eval_auc": 0.408681470822737,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25738822935084615,
+ "eval_f1_macro": 0.37848134871797623,
+ "eval_loss": 0.9496278166770935,
+ "eval_pr_auc": 0.12511500176534787,
+ "eval_precision": 0.16003140950137418,
+ "eval_precision_macro": 0.5033533652151325,
+ "eval_pred_class_0": 6933,
+ "eval_pred_class_1": 12735,
+ "eval_predicted_binding_ratio": 0.6474984746796827,
+ "eval_recall": 0.6572073524669462,
+ "eval_recall_macro": 0.5057630895249562,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.014,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 288
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.408989221069758,
+ "eval_auc": 0.4113766625614338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.255349135169763,
+ "eval_f1_macro": 0.38271123621844805,
+ "eval_loss": 0.9350630640983582,
+ "eval_pr_auc": 0.1257149536327416,
+ "eval_precision": 0.15932528579422817,
+ "eval_precision_macro": 0.5022775332449281,
+ "eval_pred_class_0": 7159,
+ "eval_pred_class_1": 12509,
+ "eval_predicted_binding_ratio": 0.6360077282896075,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.5039700323120913,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.61,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 306
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4161582265609111,
+ "eval_auc": 0.41440477389195646,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2538176619663396,
+ "eval_f1_macro": 0.3871501973338609,
+ "eval_loss": 0.9196970462799072,
+ "eval_pr_auc": 0.12640556118775828,
+ "eval_precision": 0.158935546875,
+ "eval_precision_macro": 0.5016899956597223,
+ "eval_pred_class_0": 7380,
+ "eval_pred_class_1": 12288,
+ "eval_predicted_binding_ratio": 0.62477120195241,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.5029831666503388,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.105,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 324
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.4237848281472443,
+ "eval_auc": 0.4176888499450513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25100786464873437,
+ "eval_f1_macro": 0.3913994084656603,
+ "eval_loss": 0.9033117294311523,
+ "eval_pr_auc": 0.12714405405007598,
+ "eval_precision": 0.15785536159600996,
+ "eval_precision_macro": 0.5002421610284318,
+ "eval_pred_class_0": 7638,
+ "eval_pred_class_1": 12030,
+ "eval_predicted_binding_ratio": 0.611653447223917,
+ "eval_recall": 0.6123831022250886,
+ "eval_recall_macro": 0.5004331156685895,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.078,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 342
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4312080536912752,
+ "eval_auc": 0.4212995000006521,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24904343156340203,
+ "eval_f1_macro": 0.39564573885956833,
+ "eval_loss": 0.8857852220535278,
+ "eval_pr_auc": 0.12799421494868934,
+ "eval_precision": 0.1572566971854866,
+ "eval_precision_macro": 0.49948708843014167,
+ "eval_pred_class_0": 7872,
+ "eval_pred_class_1": 11796,
+ "eval_predicted_binding_ratio": 0.5997559487492373,
+ "eval_recall": 0.5981941309255079,
+ "eval_recall_macro": 0.4990729210793411,
+ "eval_runtime": 0.2709,
+ "eval_samples_per_second": 601.789,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 360
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.4394956274150905,
+ "eval_auc": 0.42538333442304876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24493150684931506,
+ "eval_f1_macro": 0.399632635701501,
+ "eval_loss": 0.8671084642410278,
+ "eval_pr_auc": 0.12894871744717554,
+ "eval_precision": 0.15549178189407775,
+ "eval_precision_macro": 0.4973810972146359,
+ "eval_pred_class_0": 8169,
+ "eval_pred_class_1": 11499,
+ "eval_predicted_binding_ratio": 0.5846552776082977,
+ "eval_recall": 0.5765881973556917,
+ "eval_recall_macro": 0.4952114645256155,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 607.95,
+ "eval_steps_per_second": 3.73,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 378
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.44824079723408583,
+ "eval_auc": 0.42976391273864795,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24080033580523297,
+ "eval_f1_macro": 0.4037241835563183,
+ "eval_loss": 0.8476783633232117,
+ "eval_pr_auc": 0.13001972671009082,
+ "eval_precision": 0.15375681229339766,
+ "eval_precision_macro": 0.495462476943159,
+ "eval_pred_class_0": 8475,
+ "eval_pred_class_1": 11193,
+ "eval_predicted_binding_ratio": 0.5690970103721782,
+ "eval_recall": 0.5549822637858756,
+ "eval_recall_macro": 0.491621632285284,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.096,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 396
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.46044335977221884,
+ "eval_auc": 0.4345819960798662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23819095477386934,
+ "eval_f1_macro": 0.4102471738365922,
+ "eval_loss": 0.8271914720535278,
+ "eval_pr_auc": 0.1312038077210987,
+ "eval_precision": 0.15319974143503556,
+ "eval_precision_macro": 0.49502955733365084,
+ "eval_pred_class_0": 8839,
+ "eval_pred_class_1": 10829,
+ "eval_predicted_binding_ratio": 0.5505897905226764,
+ "eval_recall": 0.5349887133182845,
+ "eval_recall_macro": 0.4907393617898237,
+ "eval_runtime": 0.2699,
+ "eval_samples_per_second": 603.817,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 414
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.4719849501728696,
+ "eval_auc": 0.4399397854182523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2337489854644728,
+ "eval_f1_macro": 0.4154821023975197,
+ "eval_loss": 0.8054794669151306,
+ "eval_pr_auc": 0.13253606290408437,
+ "eval_precision": 0.1515499425947187,
+ "eval_precision_macro": 0.4934724539362483,
+ "eval_pred_class_0": 9216,
+ "eval_pred_class_1": 10452,
+ "eval_predicted_binding_ratio": 0.5314215985356925,
+ "eval_recall": 0.5108029667849081,
+ "eval_recall_macro": 0.48776099326147077,
+ "eval_runtime": 0.2459,
+ "eval_samples_per_second": 662.968,
+ "eval_steps_per_second": 4.067,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 432
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.48713646532438476,
+ "eval_auc": 0.4457222328836341,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23099794160250056,
+ "eval_f1_macro": 0.42313846887516615,
+ "eval_loss": 0.7826969027519226,
+ "eval_pr_auc": 0.13403779679155806,
+ "eval_precision": 0.15125798722044728,
+ "eval_precision_macro": 0.4934698556077371,
+ "eval_pred_class_0": 9652,
+ "eval_pred_class_1": 10016,
+ "eval_predicted_binding_ratio": 0.5092536099247509,
+ "eval_recall": 0.48855207997420186,
+ "eval_recall_macro": 0.48771178574674356,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.772,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 450
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.506152125279642,
+ "eval_auc": 0.452125351005008,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22648721828462212,
+ "eval_f1_macro": 0.43188881620754876,
+ "eval_loss": 0.7587484121322632,
+ "eval_pr_auc": 0.13570124162691763,
+ "eval_precision": 0.15038071065989847,
+ "eval_precision_macro": 0.492983148122742,
+ "eval_pred_class_0": 10212,
+ "eval_pred_class_1": 9456,
+ "eval_predicted_binding_ratio": 0.4807809640024405,
+ "eval_recall": 0.4585617542728152,
+ "eval_recall_macro": 0.48681090671327726,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.566,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.529997966239577,
+ "eval_auc": 0.4588746151842906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22371514947934162,
+ "eval_f1_macro": 0.4433436473661838,
+ "eval_loss": 0.7342172861099243,
+ "eval_pr_auc": 0.13749280910612846,
+ "eval_precision": 0.15124332916997843,
+ "eval_precision_macro": 0.4941834913044441,
+ "eval_pred_class_0": 10861,
+ "eval_pred_class_1": 8807,
+ "eval_predicted_binding_ratio": 0.44778320113890585,
+ "eval_recall": 0.4295388584327636,
+ "eval_recall_macro": 0.4891703467029515,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.173,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 486
+ },
+ {
+ "epoch": 27.77777777777778,
+ "grad_norm": 191838.453125,
+ "learning_rate": 5.544444444444443e-07,
+ "loss": 0.954,
+ "step": 500
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.5579621720561317,
+ "eval_auc": 0.46628288633295734,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22208303507516106,
+ "eval_f1_macro": 0.4566736198103078,
+ "eval_loss": 0.7085328102111816,
+ "eval_pr_auc": 0.1397221721421834,
+ "eval_precision": 0.15368421052631578,
+ "eval_precision_macro": 0.4966212823527809,
+ "eval_pred_class_0": 11593,
+ "eval_pred_class_1": 8075,
+ "eval_predicted_binding_ratio": 0.4105653853976002,
+ "eval_recall": 0.400193485972267,
+ "eval_recall_macro": 0.49384334768221605,
+ "eval_runtime": 0.245,
+ "eval_samples_per_second": 665.264,
+ "eval_steps_per_second": 4.081,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 504
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.5890278625177954,
+ "eval_auc": 0.47432292318642716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21926011784023955,
+ "eval_f1_macro": 0.4701862470303913,
+ "eval_loss": 0.6820237636566162,
+ "eval_pr_auc": 0.1419831923592407,
+ "eval_precision": 0.15650854936569222,
+ "eval_precision_macro": 0.4990822386003719,
+ "eval_pred_class_0": 12416,
+ "eval_pred_class_1": 7252,
+ "eval_predicted_binding_ratio": 0.36872076469391907,
+ "eval_recall": 0.36601096420509516,
+ "eval_recall_macro": 0.49839149043235986,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 681.884,
+ "eval_steps_per_second": 4.183,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 522
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.62385600976205,
+ "eval_auc": 0.4829859859606367,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21797040169133192,
+ "eval_f1_macro": 0.4851734455906117,
+ "eval_loss": 0.6552286148071289,
+ "eval_pr_auc": 0.144533301478986,
+ "eval_precision": 0.16213241075640825,
+ "eval_precision_macro": 0.5032992807407407,
+ "eval_pred_class_0": 13309,
+ "eval_pred_class_1": 6359,
+ "eval_predicted_binding_ratio": 0.32331706324994913,
+ "eval_recall": 0.3324733956788133,
+ "eval_recall_macro": 0.5054351043101014,
+ "eval_runtime": 0.2289,
+ "eval_samples_per_second": 712.13,
+ "eval_steps_per_second": 4.369,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 540
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.6593959731543624,
+ "eval_auc": 0.4923438323703967,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21308586867144366,
+ "eval_f1_macro": 0.49787408315316334,
+ "eval_loss": 0.6283431053161621,
+ "eval_pr_auc": 0.14738118302130468,
+ "eval_precision": 0.16759053954175906,
+ "eval_precision_macro": 0.5068452136541568,
+ "eval_pred_class_0": 14256,
+ "eval_pred_class_1": 5412,
+ "eval_predicted_binding_ratio": 0.2751677852348993,
+ "eval_recall": 0.2924862947436311,
+ "eval_recall_macro": 0.5102800882784371,
+ "eval_runtime": 0.2716,
+ "eval_samples_per_second": 600.185,
+ "eval_steps_per_second": 3.682,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 558
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.6967154769168192,
+ "eval_auc": 0.5026651961769109,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21316449017280042,
+ "eval_f1_macro": 0.5126600281126953,
+ "eval_loss": 0.6013967990875244,
+ "eval_pr_auc": 0.15068393162958252,
+ "eval_precision": 0.18035714285714285,
+ "eval_precision_macro": 0.5146913446706046,
+ "eval_pred_class_0": 15188,
+ "eval_pred_class_1": 4480,
+ "eval_predicted_binding_ratio": 0.2277811673784828,
+ "eval_recall": 0.2605611093195743,
+ "eval_recall_macro": 0.5194578347949956,
+ "eval_runtime": 0.2705,
+ "eval_samples_per_second": 602.56,
+ "eval_steps_per_second": 3.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 576
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.7287472035794184,
+ "eval_auc": 0.5136348320064595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20692730786383232,
+ "eval_f1_macro": 0.5216610840892347,
+ "eval_loss": 0.575495719909668,
+ "eval_pr_auc": 0.15441446935423722,
+ "eval_precision": 0.19194704908990623,
+ "eval_precision_macro": 0.5210140431835268,
+ "eval_pred_class_0": 16042,
+ "eval_pred_class_1": 3626,
+ "eval_predicted_binding_ratio": 0.18436038234695953,
+ "eval_recall": 0.22444372782973235,
+ "eval_recall_macro": 0.5237930596654548,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.887,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 594
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.7577791336180598,
+ "eval_auc": 0.5256758602512032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20013431833445267,
+ "eval_f1_macro": 0.5287070633014385,
+ "eval_loss": 0.5502753853797913,
+ "eval_pr_auc": 0.15881070257620672,
+ "eval_precision": 0.20875656742556917,
+ "eval_precision_macro": 0.5298823579410603,
+ "eval_pred_class_0": 16813,
+ "eval_pred_class_1": 2855,
+ "eval_predicted_binding_ratio": 0.14515965019320723,
+ "eval_recall": 0.19219606578523057,
+ "eval_recall_macro": 0.5279203302306971,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.974,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 612
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.7844213951596501,
+ "eval_auc": 0.5388391234856937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19575113808801214,
+ "eval_f1_macro": 0.5356397775926205,
+ "eval_loss": 0.5265588164329529,
+ "eval_pr_auc": 0.16395620275178963,
+ "eval_precision": 0.23767848917549517,
+ "eval_precision_macro": 0.5449694383352471,
+ "eval_pred_class_0": 17497,
+ "eval_pred_class_1": 2171,
+ "eval_predicted_binding_ratio": 0.11038234695952817,
+ "eval_recall": 0.16639793614962914,
+ "eval_recall_macro": 0.5332502748895668,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.866,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 630
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8094366483628228,
+ "eval_auc": 0.5531888075405533,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19189305735230702,
+ "eval_f1_macro": 0.5419376520838427,
+ "eval_loss": 0.5044229626655579,
+ "eval_pr_auc": 0.16987983494600534,
+ "eval_precision": 0.28952504879635654,
+ "eval_precision_macro": 0.5715178054086024,
+ "eval_pred_class_0": 18131,
+ "eval_pred_class_1": 1537,
+ "eval_predicted_binding_ratio": 0.07814724425462681,
+ "eval_recall": 0.14350209609803288,
+ "eval_recall_macro": 0.5387939646905326,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.446,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 648
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.827791336180598,
+ "eval_auc": 0.5689342779333475,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18796451690242147,
+ "eval_f1_macro": 0.5458235779450257,
+ "eval_loss": 0.4842270016670227,
+ "eval_pr_auc": 0.177326879876991,
+ "eval_precision": 0.3663551401869159,
+ "eval_precision_macro": 0.6103471582212137,
+ "eval_pred_class_0": 18598,
+ "eval_pred_class_1": 1070,
+ "eval_predicted_binding_ratio": 0.05440309131584299,
+ "eval_recall": 0.12641083521444696,
+ "eval_recall_macro": 0.5427430526648682,
+ "eval_runtime": 0.2392,
+ "eval_samples_per_second": 681.483,
+ "eval_steps_per_second": 4.181,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 666
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8386719544437665,
+ "eval_auc": 0.5868017348062602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1820056715648363,
+ "eval_f1_macro": 0.5462584975699354,
+ "eval_loss": 0.46564891934394836,
+ "eval_pr_auc": 0.18685168882837525,
+ "eval_precision": 0.4537275064267352,
+ "eval_precision_macro": 0.6541268553838282,
+ "eval_pred_class_0": 18890,
+ "eval_pred_class_1": 778,
+ "eval_predicted_binding_ratio": 0.039556640227781166,
+ "eval_recall": 0.11383424701709126,
+ "eval_recall_macro": 0.5440904198204911,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.584,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 684
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8441631075859264,
+ "eval_auc": 0.6057814605899876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17982338774417983,
+ "eval_f1_macro": 0.5468627318225942,
+ "eval_loss": 0.4492926001548767,
+ "eval_pr_auc": 0.19848375437748741,
+ "eval_precision": 0.5283018867924528,
+ "eval_precision_macro": 0.6915101279275421,
+ "eval_pred_class_0": 19032,
+ "eval_pred_class_1": 636,
+ "eval_predicted_binding_ratio": 0.03233679072605247,
+ "eval_recall": 0.10835214446952596,
+ "eval_recall_macro": 0.5451219284549598,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.049,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.8456375838926175,
+ "eval_auc": 0.6262280880815292,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1758957654723127,
+ "eval_f1_macro": 0.5453696262568565,
+ "eval_loss": 0.43461790680885315,
+ "eval_pr_auc": 0.21275175506055685,
+ "eval_precision": 0.5557461406518011,
+ "eval_precision_macro": 0.7051195990133514,
+ "eval_pred_class_0": 19085,
+ "eval_pred_class_1": 583,
+ "eval_predicted_binding_ratio": 0.0296420581655481,
+ "eval_recall": 0.10448242502418574,
+ "eval_recall_macro": 0.544424468382196,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.873,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 720
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8483323164531218,
+ "eval_auc": 0.6481986497247736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18519530183010108,
+ "eval_f1_macro": 0.5507896621273841,
+ "eval_loss": 0.42107364535331726,
+ "eval_pr_auc": 0.23051421419341214,
+ "eval_precision": 0.6053571428571428,
+ "eval_precision_macro": 0.730405178085469,
+ "eval_pred_class_0": 19108,
+ "eval_pred_class_1": 560,
+ "eval_predicted_binding_ratio": 0.02847264592231035,
+ "eval_recall": 0.10931957433086101,
+ "eval_recall_macro": 0.5479899012476421,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.385,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 738
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8502135448444174,
+ "eval_auc": 0.6709813300109956,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20292207792207792,
+ "eval_f1_macro": 0.5601310726310726,
+ "eval_loss": 0.4084097743034363,
+ "eval_pr_auc": 0.25093797354762637,
+ "eval_precision": 0.6302521008403361,
+ "eval_precision_macro": 0.7436637739036264,
+ "eval_pred_class_0": 19073,
+ "eval_pred_class_1": 595,
+ "eval_predicted_binding_ratio": 0.03025218629245475,
+ "eval_recall": 0.12092873266688164,
+ "eval_recall_macro": 0.5538246608949184,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.746,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 756
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8526540573520439,
+ "eval_auc": 0.6936772353365158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22263948497854077,
+ "eval_f1_macro": 0.5706266398157138,
+ "eval_loss": 0.39666271209716797,
+ "eval_pr_auc": 0.2738840395423864,
+ "eval_precision": 0.6618819776714514,
+ "eval_precision_macro": 0.7604089789622948,
+ "eval_pred_class_0": 19041,
+ "eval_pred_class_1": 627,
+ "eval_predicted_binding_ratio": 0.031879194630872486,
+ "eval_recall": 0.13382779748468235,
+ "eval_recall_macro": 0.5605156371379469,
+ "eval_runtime": 0.2268,
+ "eval_samples_per_second": 718.638,
+ "eval_steps_per_second": 4.409,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 774
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.8544844417327638,
+ "eval_auc": 0.7158095511124275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24325753569539926,
+ "eval_f1_macro": 0.58138013196988,
+ "eval_loss": 0.38576817512512207,
+ "eval_pr_auc": 0.29905525248581355,
+ "eval_precision": 0.6754772393538914,
+ "eval_precision_macro": 0.7681910344870789,
+ "eval_pred_class_0": 18987,
+ "eval_pred_class_1": 681,
+ "eval_predicted_binding_ratio": 0.03462477120195241,
+ "eval_recall": 0.14833924540470816,
+ "eval_recall_macro": 0.5674997367845657,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.061,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 792
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8575859263778727,
+ "eval_auc": 0.737084336405228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2808729139922978,
+ "eval_f1_macro": 0.60092007766148,
+ "eval_loss": 0.37560486793518066,
+ "eval_pr_auc": 0.3260256629572295,
+ "eval_precision": 0.6889168765743073,
+ "eval_precision_macro": 0.7767992245539758,
+ "eval_pred_class_0": 18874,
+ "eval_pred_class_1": 794,
+ "eval_predicted_binding_ratio": 0.040370144396990035,
+ "eval_recall": 0.1763947113834247,
+ "eval_recall_macro": 0.5807427773130077,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.1,
+ "eval_steps_per_second": 3.725,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 810
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8611450071181614,
+ "eval_auc": 0.7571642141385685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.32451150136037593,
+ "eval_f1_macro": 0.623565358817779,
+ "eval_loss": 0.36611661314964294,
+ "eval_pr_auc": 0.3532860869347882,
+ "eval_precision": 0.6963906581740976,
+ "eval_precision_macro": 0.7829117661264593,
+ "eval_pred_class_0": 18726,
+ "eval_pred_class_1": 942,
+ "eval_predicted_binding_ratio": 0.047895057962172055,
+ "eval_recall": 0.21154466301193164,
+ "eval_recall_macro": 0.5971407144358867,
+ "eval_runtime": 0.2698,
+ "eval_samples_per_second": 604.115,
+ "eval_steps_per_second": 3.706,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 828
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8646532438478747,
+ "eval_auc": 0.7759390708192488,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3694931312174325,
+ "eval_f1_macro": 0.6468414565354121,
+ "eval_loss": 0.3574466407299042,
+ "eval_pr_auc": 0.380672409235741,
+ "eval_precision": 0.695807314897413,
+ "eval_precision_macro": 0.7853328912870632,
+ "eval_pred_class_0": 18547,
+ "eval_pred_class_1": 1121,
+ "eval_predicted_binding_ratio": 0.05699613585519626,
+ "eval_recall": 0.25153176394711385,
+ "eval_recall_macro": 0.6154743385438473,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.868,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 846
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8670937563555013,
+ "eval_auc": 0.7932697219796829,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4004587155963303,
+ "eval_f1_macro": 0.6628608765538834,
+ "eval_loss": 0.34963178634643555,
+ "eval_pr_auc": 0.40879055918048346,
+ "eval_precision": 0.69340746624305,
+ "eval_precision_macro": 0.7861898540406407,
+ "eval_pred_class_0": 18409,
+ "eval_pred_class_1": 1259,
+ "eval_predicted_binding_ratio": 0.06401260931462274,
+ "eval_recall": 0.2815220896485005,
+ "eval_recall_macro": 0.6291113798275701,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.281,
+ "eval_steps_per_second": 3.971,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 864
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8693308928208257,
+ "eval_auc": 0.8090460638591691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.43491644678979774,
+ "eval_f1_macro": 0.6805201987887128,
+ "eval_loss": 0.3424255847930908,
+ "eval_pr_auc": 0.43548439720530613,
+ "eval_precision": 0.6834830684174154,
+ "eval_precision_macro": 0.783786427463743,
+ "eval_pred_class_0": 18221,
+ "eval_pred_class_1": 1447,
+ "eval_predicted_binding_ratio": 0.07357128330282693,
+ "eval_recall": 0.3189293776201225,
+ "eval_recall_macro": 0.6456420293062284,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.097,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 882
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8711612772015457,
+ "eval_auc": 0.8231584209269593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.46153846153846156,
+ "eval_f1_macro": 0.6941824562962304,
+ "eval_loss": 0.3358187675476074,
+ "eval_pr_auc": 0.46013674866792464,
+ "eval_precision": 0.6766355140186916,
+ "eval_precision_macro": 0.7825407542966181,
+ "eval_pred_class_0": 18063,
+ "eval_pred_class_1": 1605,
+ "eval_predicted_binding_ratio": 0.0816046369737645,
+ "eval_recall": 0.35020960980328925,
+ "eval_recall_macro": 0.659441136162585,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 900
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8745169818995322,
+ "eval_auc": 0.8357514570475526,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.49363972096840375,
+ "eval_f1_macro": 0.7110123043354003,
+ "eval_loss": 0.32985639572143555,
+ "eval_pr_auc": 0.48277553791567623,
+ "eval_precision": 0.6785109983079526,
+ "eval_precision_macro": 0.7862239260888744,
+ "eval_pred_class_0": 17895,
+ "eval_pred_class_1": 1773,
+ "eval_predicted_binding_ratio": 0.0901464307504576,
+ "eval_recall": 0.38793937439535636,
+ "eval_recall_macro": 0.6767668140160521,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 653.0,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 918
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8772117144600367,
+ "eval_auc": 0.8463606400457255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.523574669560071,
+ "eval_f1_macro": 0.7265493507137326,
+ "eval_loss": 0.3246362507343292,
+ "eval_pr_auc": 0.5014434788718165,
+ "eval_precision": 0.6742886178861789,
+ "eval_precision_macro": 0.787031314592807,
+ "eval_pred_class_0": 17700,
+ "eval_pred_class_1": 1968,
+ "eval_predicted_binding_ratio": 0.10006101281269067,
+ "eval_recall": 0.4279264753305385,
+ "eval_recall_macro": 0.6946175504557563,
+ "eval_runtime": 0.2301,
+ "eval_samples_per_second": 708.248,
+ "eval_steps_per_second": 4.345,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.8556500280578212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5445847684653655,
+ "eval_f1_macro": 0.7374052543587455,
+ "eval_loss": 0.3201504647731781,
+ "eval_pr_auc": 0.5184804467620471,
+ "eval_precision": 0.6696470588235294,
+ "eval_precision_macro": 0.786998185969936,
+ "eval_pred_class_0": 17543,
+ "eval_pred_class_1": 2125,
+ "eval_predicted_binding_ratio": 0.10804352247305267,
+ "eval_recall": 0.45888423089326025,
+ "eval_recall_macro": 0.7082554190018906,
+ "eval_runtime": 0.2666,
+ "eval_samples_per_second": 611.344,
+ "eval_steps_per_second": 3.751,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 954
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8792454748830588,
+ "eval_auc": 0.8636336358823378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5594509367464292,
+ "eval_f1_macro": 0.744742407539513,
+ "eval_loss": 0.31616976857185364,
+ "eval_pr_auc": 0.5331175601979875,
+ "eval_precision": 0.6585152838427948,
+ "eval_precision_macro": 0.7834238290545543,
+ "eval_pred_class_0": 17378,
+ "eval_pred_class_1": 2290,
+ "eval_predicted_binding_ratio": 0.11643278421801911,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719546237029523,
+ "eval_runtime": 0.2694,
+ "eval_samples_per_second": 604.964,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 972
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8814317673378076,
+ "eval_auc": 0.8703512791725087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.578757225433526,
+ "eval_f1_macro": 0.7548815712966447,
+ "eval_loss": 0.31280621886253357,
+ "eval_pr_auc": 0.5453871030590061,
+ "eval_precision": 0.657905544147844,
+ "eval_precision_macro": 0.7854606348952531,
+ "eval_pred_class_0": 17233,
+ "eval_pred_class_1": 2435,
+ "eval_predicted_binding_ratio": 0.12380516575147447,
+ "eval_recall": 0.5166075459529185,
+ "eval_recall_macro": 0.7331634337478723,
+ "eval_runtime": 0.3648,
+ "eval_samples_per_second": 446.874,
+ "eval_steps_per_second": 2.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 990
+ },
+ {
+ "epoch": 55.55555555555556,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.996314582053105e-07,
+ "loss": 0.4604,
+ "step": 1000
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8834146837502542,
+ "eval_auc": 0.8759527216222862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5956621407159232,
+ "eval_f1_macro": 0.7637749289648232,
+ "eval_loss": 0.3100614845752716,
+ "eval_pr_auc": 0.5551596710183998,
+ "eval_precision": 0.6571984435797665,
+ "eval_precision_macro": 0.7873078426812156,
+ "eval_pred_class_0": 17098,
+ "eval_pred_class_1": 2570,
+ "eval_predicted_binding_ratio": 0.1306691071791743,
+ "eval_recall": 0.5446630119316349,
+ "eval_recall_macro": 0.745742503732462,
+ "eval_runtime": 0.2507,
+ "eval_samples_per_second": 650.302,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1008
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8840756558877364,
+ "eval_auc": 0.8809651824326759,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6064894718674491,
+ "eval_f1_macro": 0.7692574960553631,
+ "eval_loss": 0.30748215317726135,
+ "eval_pr_auc": 0.5634298069700459,
+ "eval_precision": 0.6524322317118455,
+ "eval_precision_macro": 0.7866284869899434,
+ "eval_pred_class_0": 16975,
+ "eval_pred_class_1": 2693,
+ "eval_predicted_binding_ratio": 0.13692292047996746,
+ "eval_recall": 0.5665914221218962,
+ "eval_recall_macro": 0.7550467824679621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.163,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1026
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.8849908480780964,
+ "eval_auc": 0.8853361353068065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6121399176954733,
+ "eval_f1_macro": 0.7723127955239544,
+ "eval_loss": 0.30511021614074707,
+ "eval_pr_auc": 0.5712324508006517,
+ "eval_precision": 0.6536067374588063,
+ "eval_precision_macro": 0.7879535133831199,
+ "eval_pred_class_0": 16937,
+ "eval_pred_class_1": 2731,
+ "eval_predicted_binding_ratio": 0.13885499288183853,
+ "eval_recall": 0.5756207674943566,
+ "eval_recall_macro": 0.7592596503615321,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.515,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1044
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.8853467561521253,
+ "eval_auc": 0.8892084338643703,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.616822429906542,
+ "eval_f1_macro": 0.7747051971959543,
+ "eval_loss": 0.3030014634132385,
+ "eval_pr_auc": 0.5778370115776272,
+ "eval_precision": 0.6519396551724138,
+ "eval_precision_macro": 0.7878864350252024,
+ "eval_pred_class_0": 16884,
+ "eval_pred_class_1": 2784,
+ "eval_predicted_binding_ratio": 0.1415497254423429,
+ "eval_recall": 0.5852950661077072,
+ "eval_recall_macro": 0.7634026486450891,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.008,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1062
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.884685784014643,
+ "eval_auc": 0.8924710108272688,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6202277294038848,
+ "eval_f1_macro": 0.7761251343338811,
+ "eval_loss": 0.3011925220489502,
+ "eval_pr_auc": 0.5832812236308141,
+ "eval_precision": 0.6450714036920934,
+ "eval_precision_macro": 0.7853564436451774,
+ "eval_pred_class_0": 16797,
+ "eval_pred_class_1": 2871,
+ "eval_predicted_binding_ratio": 0.14597315436241612,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7678594421600216,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.98,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1080
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8856518202155786,
+ "eval_auc": 0.8954398707041407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6242272347535506,
+ "eval_f1_macro": 0.7783964874556335,
+ "eval_loss": 0.29942846298217773,
+ "eval_pr_auc": 0.5886815510653964,
+ "eval_precision": 0.6477115117891817,
+ "eval_precision_macro": 0.7871243450270979,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7705297965613797,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.921,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1098
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8865161683953631,
+ "eval_auc": 0.8978366542923133,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6288659793814433,
+ "eval_f1_macro": 0.7809416026191173,
+ "eval_loss": 0.29785510897636414,
+ "eval_pr_auc": 0.593021329597711,
+ "eval_precision": 0.6491589426707861,
+ "eval_precision_macro": 0.7884708470441367,
+ "eval_pred_class_0": 16755,
+ "eval_pred_class_1": 2913,
+ "eval_predicted_binding_ratio": 0.14810860280658938,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7740571948209012,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.719,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1116
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8871262965222697,
+ "eval_auc": 0.8998632136201572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6315964155326916,
+ "eval_f1_macro": 0.7824748814379159,
+ "eval_loss": 0.2964444160461426,
+ "eval_pr_auc": 0.5970041919243015,
+ "eval_precision": 0.6505982905982906,
+ "eval_precision_macro": 0.789523000044412,
+ "eval_pred_class_0": 16743,
+ "eval_pred_class_1": 2925,
+ "eval_predicted_binding_ratio": 0.14871873093349602,
+ "eval_recall": 0.6136730087068688,
+ "eval_recall_macro": 0.7759920545435715,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.792,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1134
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.8876855806386008,
+ "eval_auc": 0.9017429582012333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6356589147286822,
+ "eval_f1_macro": 0.7846343742639293,
+ "eval_loss": 0.2951850891113281,
+ "eval_pr_auc": 0.6005268804358049,
+ "eval_precision": 0.650573936529372,
+ "eval_precision_macro": 0.7901498917652248,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.7794694277584535,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.376,
+ "eval_steps_per_second": 3.739,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1152
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8881940207443563,
+ "eval_auc": 0.9033021142666618,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6377861966727063,
+ "eval_f1_macro": 0.785840340182137,
+ "eval_loss": 0.2939698398113251,
+ "eval_pr_auc": 0.6035211605243039,
+ "eval_precision": 0.6518518518518519,
+ "eval_precision_macro": 0.7910415086304414,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6243147371815544,
+ "eval_recall_macro": 0.7809507530297222,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.415,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8882448647549319,
+ "eval_auc": 0.9048048023731414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389618922470434,
+ "eval_f1_macro": 0.7864263262967652,
+ "eval_loss": 0.29283198714256287,
+ "eval_pr_auc": 0.6066927627742578,
+ "eval_precision": 0.6511550050217609,
+ "eval_precision_macro": 0.7909273016835919,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6272170267655595,
+ "eval_recall_macro": 0.7821604539875966,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.68,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1188
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.888346552776083,
+ "eval_auc": 0.9061457752769495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6402359108781127,
+ "eval_f1_macro": 0.7870775124924988,
+ "eval_loss": 0.29169291257858276,
+ "eval_pr_auc": 0.6096183698390041,
+ "eval_precision": 0.6506826506826506,
+ "eval_precision_macro": 0.7909278839971909,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.783400335424737,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.516,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1206
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9074730837522218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641781270464964,
+ "eval_f1_macro": 0.7879665952661885,
+ "eval_loss": 0.2904839515686035,
+ "eval_pr_auc": 0.6127271933864005,
+ "eval_precision": 0.6518124376454939,
+ "eval_precision_macro": 0.7916645766644131,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6320541760722348,
+ "eval_recall_macro": 0.7844281262446042,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.958,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1224
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.889261744966443,
+ "eval_auc": 0.9085174295528148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6431847968545217,
+ "eval_f1_macro": 0.788822778783544,
+ "eval_loss": 0.28959015011787415,
+ "eval_pr_auc": 0.6152976575518759,
+ "eval_precision": 0.6536796536796536,
+ "eval_precision_macro": 0.7926964124983926,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6330216059335698,
+ "eval_recall_macro": 0.7851231045301337,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.894,
+ "eval_steps_per_second": 4.079,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1242
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9095093658465239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6446766819072501,
+ "eval_f1_macro": 0.7895790973067505,
+ "eval_loss": 0.2887136936187744,
+ "eval_pr_auc": 0.6176593727552148,
+ "eval_precision": 0.6529937148527952,
+ "eval_precision_macro": 0.7926428472131204,
+ "eval_pred_class_0": 16645,
+ "eval_pred_class_1": 3023,
+ "eval_predicted_binding_ratio": 0.15370144396990035,
+ "eval_recall": 0.636568848758465,
+ "eval_recall_macro": 0.7866251016291872,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.817,
+ "eval_steps_per_second": 3.796,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1260
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.889821029082774,
+ "eval_auc": 0.9104295930879169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6464349812367434,
+ "eval_f1_macro": 0.7905888279869988,
+ "eval_loss": 0.28783899545669556,
+ "eval_pr_auc": 0.619972501272285,
+ "eval_precision": 0.6542272126816381,
+ "eval_precision_macro": 0.7934597601869728,
+ "eval_pred_class_0": 16640,
+ "eval_pred_class_1": 3028,
+ "eval_predicted_binding_ratio": 0.1539556640227781,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7878141307592769,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.334,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1278
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8899735611145008,
+ "eval_auc": 0.911281293804153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.646289637136319,
+ "eval_f1_macro": 0.7905721170208057,
+ "eval_loss": 0.28697267174720764,
+ "eval_pr_auc": 0.6225153426830469,
+ "eval_precision": 0.6552867086509778,
+ "eval_precision_macro": 0.7938916277024632,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.6375362786198001,
+ "eval_recall_macro": 0.7873804408732489,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.762,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1296
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.890736221273134,
+ "eval_auc": 0.911925575502615,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6505122784192552,
+ "eval_f1_macro": 0.7928790035842321,
+ "eval_loss": 0.2863345444202423,
+ "eval_pr_auc": 0.6235765349975187,
+ "eval_precision": 0.6561679790026247,
+ "eval_precision_macro": 0.7949612458190018,
+ "eval_pred_class_0": 16620,
+ "eval_pred_class_1": 3048,
+ "eval_predicted_binding_ratio": 0.1549725442342892,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7908474781742385,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.574,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1314
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9126060328997004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506692784851453,
+ "eval_f1_macro": 0.793115428161573,
+ "eval_loss": 0.28558436036109924,
+ "eval_pr_auc": 0.625919370718976,
+ "eval_precision": 0.6588429752066116,
+ "eval_precision_macro": 0.7961342196828587,
+ "eval_pred_class_0": 16643,
+ "eval_pred_class_1": 3025,
+ "eval_predicted_binding_ratio": 0.15380313199105144,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.7902016976709372,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.167,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1332
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8916005694529184,
+ "eval_auc": 0.9132873078266985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6526555881394591,
+ "eval_f1_macro": 0.794217425975266,
+ "eval_loss": 0.28493690490722656,
+ "eval_pr_auc": 0.6278320531638758,
+ "eval_precision": 0.6595324333223576,
+ "eval_precision_macro": 0.7967555738856391,
+ "eval_pred_class_0": 16631,
+ "eval_pred_class_1": 3037,
+ "eval_predicted_binding_ratio": 0.1544132601179581,
+ "eval_recall": 0.6459206707513705,
+ "eval_recall_macro": 0.7917537198146302,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.773,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1350
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139370494570753,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6551948051948052,
+ "eval_f1_macro": 0.7955863102414826,
+ "eval_loss": 0.28430166840553284,
+ "eval_pr_auc": 0.6292546024902547,
+ "eval_precision": 0.6596927100359594,
+ "eval_precision_macro": 0.7972435493102309,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7939610311131058,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.459,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1368
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.8921598535692495,
+ "eval_auc": 0.9146080371326758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6542787286063569,
+ "eval_f1_macro": 0.7951975553215214,
+ "eval_loss": 0.283497154712677,
+ "eval_pr_auc": 0.6315022943889131,
+ "eval_precision": 0.6615029663810151,
+ "eval_precision_macro": 0.7978670296615908,
+ "eval_pred_class_0": 16634,
+ "eval_pred_class_1": 3034,
+ "eval_predicted_binding_ratio": 0.15426072808623145,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7926099364103822,
+ "eval_runtime": 0.2203,
+ "eval_samples_per_second": 740.067,
+ "eval_steps_per_second": 4.54,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1386
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9151027497871649,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655037495924356,
+ "eval_f1_macro": 0.7956531976941219,
+ "eval_loss": 0.2829034626483917,
+ "eval_pr_auc": 0.6331226154536788,
+ "eval_precision": 0.6623804813715793,
+ "eval_precision_macro": 0.7983678781970611,
+ "eval_pred_class_0": 16635,
+ "eval_pred_class_1": 3033,
+ "eval_predicted_binding_ratio": 0.1542098840756559,
+ "eval_recall": 0.6478555304740407,
+ "eval_recall_macro": 0.7930229544686254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.571,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8925157616432784,
+ "eval_auc": 0.9156585533376076,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6559244791666666,
+ "eval_f1_macro": 0.7961172166862497,
+ "eval_loss": 0.28232645988464355,
+ "eval_pr_auc": 0.6343326075351273,
+ "eval_precision": 0.6621754847190273,
+ "eval_precision_macro": 0.7984260882241754,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6497903901967107,
+ "eval_recall_macro": 0.7938696624128962,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.665,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1422
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9162826303682348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6572219508223416,
+ "eval_f1_macro": 0.7969043930945569,
+ "eval_loss": 0.28162533044815063,
+ "eval_pr_auc": 0.6363164977912346,
+ "eval_precision": 0.6638157894736842,
+ "eval_precision_macro": 0.7993423426560147,
+ "eval_pred_class_0": 16628,
+ "eval_pred_class_1": 3040,
+ "eval_predicted_binding_ratio": 0.15456579214968477,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.79453446021916,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.266,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1440
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8928208257067317,
+ "eval_auc": 0.9167705886684476,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.657124268054652,
+ "eval_f1_macro": 0.7968036671115732,
+ "eval_loss": 0.2811121940612793,
+ "eval_pr_auc": 0.6378571407612313,
+ "eval_precision": 0.6629471611421069,
+ "eval_precision_macro": 0.7989544782306408,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6514027732989358,
+ "eval_recall_macro": 0.7947060344432748,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.342,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1458
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9172343422437541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6581001951854262,
+ "eval_f1_macro": 0.7973820247953165,
+ "eval_loss": 0.2804972231388092,
+ "eval_pr_auc": 0.6394259907419034,
+ "eval_precision": 0.6639317361339022,
+ "eval_precision_macro": 0.7995370130040789,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7952802908117405,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.86,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1476
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8933801098230628,
+ "eval_auc": 0.9176662705474707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597436313483693,
+ "eval_f1_macro": 0.7982647858607822,
+ "eval_loss": 0.28001976013183594,
+ "eval_pr_auc": 0.6407525699560299,
+ "eval_precision": 0.6639451338994121,
+ "eval_precision_macro": 0.7998155152816343,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6555949693647211,
+ "eval_recall_macro": 0.7967417715176355,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.967,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1494
+ },
+ {
+ "epoch": 83.33333333333333,
+ "grad_norm": 11845.0048828125,
+ "learning_rate": 9.86567120987093e-07,
+ "loss": 0.2741,
+ "step": 1500
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9181069872977458,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592015579357352,
+ "eval_f1_macro": 0.7979494857864604,
+ "eval_loss": 0.2794816195964813,
+ "eval_pr_auc": 0.6421665674351047,
+ "eval_precision": 0.6635086573015354,
+ "eval_precision_macro": 0.799538997766201,
+ "eval_pred_class_0": 16607,
+ "eval_pred_class_1": 3061,
+ "eval_predicted_binding_ratio": 0.15563351637177142,
+ "eval_recall": 0.654950016123831,
+ "eval_recall_macro": 0.7963891144179245,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.121,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1512
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9184344761551537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6595330739299611,
+ "eval_f1_macro": 0.7981095181516664,
+ "eval_loss": 0.27904370427131653,
+ "eval_pr_auc": 0.6429990318434126,
+ "eval_precision": 0.6631887838278449,
+ "eval_precision_macro": 0.7994577736379149,
+ "eval_pred_class_0": 16601,
+ "eval_pred_class_1": 3067,
+ "eval_predicted_binding_ratio": 0.15593858043522474,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.796782287910794,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.288,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1530
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8933292658124873,
+ "eval_auc": 0.9189190864757253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585286458333334,
+ "eval_f1_macro": 0.797660321952579,
+ "eval_loss": 0.27837634086608887,
+ "eval_pr_auc": 0.6447520419594072,
+ "eval_precision": 0.664804469273743,
+ "eval_precision_macro": 0.7999811820052926,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7954010127288045,
+ "eval_runtime": 0.2401,
+ "eval_samples_per_second": 678.873,
+ "eval_steps_per_second": 4.165,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1548
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8936851738865161,
+ "eval_auc": 0.9193183038504471,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6602761982128351,
+ "eval_f1_macro": 0.7986291029941847,
+ "eval_loss": 0.27788689732551575,
+ "eval_pr_auc": 0.6462039154555116,
+ "eval_precision": 0.6653569089718402,
+ "eval_precision_macro": 0.8005067920325675,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.655272492744276,
+ "eval_recall_macro": 0.7967917965622751,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.0,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1566
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8941427699816962,
+ "eval_auc": 0.9197055599839506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.662012987012987,
+ "eval_f1_macro": 0.7996283888525269,
+ "eval_loss": 0.2774609923362732,
+ "eval_pr_auc": 0.6474319229516793,
+ "eval_precision": 0.6665576985943119,
+ "eval_precision_macro": 0.8013082309577014,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.7979808256923646,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.753,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1584
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9201319018332661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6636378387149116,
+ "eval_f1_macro": 0.8005736295133055,
+ "eval_loss": 0.27695581316947937,
+ "eval_pr_auc": 0.6490255599400047,
+ "eval_precision": 0.6678641410842586,
+ "eval_precision_macro": 0.8021363340613392,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.7990387969914977,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.045,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1602
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.920403555344157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6648262422864566,
+ "eval_f1_macro": 0.8013081720805965,
+ "eval_loss": 0.27657878398895264,
+ "eval_pr_auc": 0.6500340451314033,
+ "eval_precision": 0.6696107294733399,
+ "eval_precision_macro": 0.8030794000144978,
+ "eval_pred_class_0": 16611,
+ "eval_pred_class_1": 3057,
+ "eval_predicted_binding_ratio": 0.1554301403294692,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7995725369668047,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.642,
+ "eval_steps_per_second": 3.832,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1620
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9207465675374016,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6654746423927178,
+ "eval_f1_macro": 0.8017284012349317,
+ "eval_loss": 0.27611449360847473,
+ "eval_pr_auc": 0.6512249430736048,
+ "eval_precision": 0.6709275647328745,
+ "eval_precision_macro": 0.8037492731289094,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7997536198424009,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.736,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8954647142566605,
+ "eval_auc": 0.9211146506479597,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6662337662337663,
+ "eval_f1_macro": 0.802130627992697,
+ "eval_loss": 0.275691956281662,
+ "eval_pr_auc": 0.6524138573777828,
+ "eval_precision": 0.6708074534161491,
+ "eval_precision_macro": 0.8038244624537546,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8004692699557154,
+ "eval_runtime": 0.2702,
+ "eval_samples_per_second": 603.318,
+ "eval_steps_per_second": 3.701,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1656
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8956680902989628,
+ "eval_auc": 0.9214137790034065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024110910186859,
+ "eval_loss": 0.27531710267066956,
+ "eval_pr_auc": 0.6535015844647576,
+ "eval_precision": 0.6716857610474631,
+ "eval_precision_macro": 0.8042712197761243,
+ "eval_pred_class_0": 16613,
+ "eval_pred_class_1": 3055,
+ "eval_predicted_binding_ratio": 0.15532845230831807,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8005899918727795,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.638,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1674
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8956172462883872,
+ "eval_auc": 0.9217489565349906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6664500406173842,
+ "eval_f1_macro": 0.802288641055505,
+ "eval_loss": 0.27483227849006653,
+ "eval_pr_auc": 0.6546621751515824,
+ "eval_precision": 0.6715782580222659,
+ "eval_precision_macro": 0.8041892734676155,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8004287535625569,
+ "eval_runtime": 0.2382,
+ "eval_samples_per_second": 684.396,
+ "eval_steps_per_second": 4.199,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1692
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.8959731543624161,
+ "eval_auc": 0.9220882801111304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6672088484059857,
+ "eval_f1_macro": 0.8027800298435859,
+ "eval_loss": 0.27437010407447815,
+ "eval_pr_auc": 0.6561534178394561,
+ "eval_precision": 0.6731211027239908,
+ "eval_precision_macro": 0.8049740042228342,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.800640016917419,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.273,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1710
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9223952917907324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024646720298894,
+ "eval_loss": 0.27396437525749207,
+ "eval_pr_auc": 0.6571939169041311,
+ "eval_precision": 0.6726854891661195,
+ "eval_precision_macro": 0.8046979364973901,
+ "eval_pred_class_0": 16622,
+ "eval_pred_class_1": 3046,
+ "eval_predicted_binding_ratio": 0.1548708562131381,
+ "eval_recall": 0.6607545952918413,
+ "eval_recall_macro": 0.8002873598177079,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.423,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1728
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9226829844599047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6679655788277318,
+ "eval_f1_macro": 0.8031632457541016,
+ "eval_loss": 0.2735843360424042,
+ "eval_pr_auc": 0.6581443956834908,
+ "eval_precision": 0.6726618705035972,
+ "eval_precision_macro": 0.8049040839573975,
+ "eval_pred_class_0": 16610,
+ "eval_pred_class_1": 3058,
+ "eval_predicted_binding_ratio": 0.15548098434004473,
+ "eval_recall": 0.6633344082554015,
+ "eval_recall_macro": 0.801456544382424,
+ "eval_runtime": 0.2325,
+ "eval_samples_per_second": 701.163,
+ "eval_steps_per_second": 4.302,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1746
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8961765304047183,
+ "eval_auc": 0.9230679047936586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668075422626788,
+ "eval_f1_macro": 0.8032698713905396,
+ "eval_loss": 0.2730555534362793,
+ "eval_pr_auc": 0.6597232694357793,
+ "eval_precision": 0.6735496558505408,
+ "eval_precision_macro": 0.8053010360254088,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.801284970158309,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.817,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1764
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8961256863941428,
+ "eval_auc": 0.9233850285396773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6689353427321342,
+ "eval_f1_macro": 0.8036671286252258,
+ "eval_loss": 0.272703617811203,
+ "eval_pr_auc": 0.6608360801532023,
+ "eval_precision": 0.6723127035830619,
+ "eval_precision_macro": 0.8049176483332829,
+ "eval_pred_class_0": 16598,
+ "eval_pred_class_1": 3070,
+ "eval_predicted_binding_ratio": 0.1560911124669514,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8024343101576514,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.703,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1782
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9236704438040937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6696891191709845,
+ "eval_f1_macro": 0.804084607836397,
+ "eval_loss": 0.27227067947387695,
+ "eval_pr_auc": 0.661859432748859,
+ "eval_precision": 0.672520325203252,
+ "eval_precision_macro": 0.8051325786806955,
+ "eval_pred_class_0": 16593,
+ "eval_pred_class_1": 3075,
+ "eval_predicted_binding_ratio": 0.15634533251982916,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.8030490829192756,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.071,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1800
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9239191675474416,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6695821185617103,
+ "eval_f1_macro": 0.8040329626642457,
+ "eval_loss": 0.2719270884990692,
+ "eval_pr_auc": 0.6626733311213273,
+ "eval_precision": 0.6726326065733811,
+ "eval_precision_macro": 0.8051623412499325,
+ "eval_pred_class_0": 16595,
+ "eval_pred_class_1": 3073,
+ "eval_predicted_binding_ratio": 0.15624364449867806,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.802918025088319,
+ "eval_runtime": 0.2736,
+ "eval_samples_per_second": 595.794,
+ "eval_steps_per_second": 3.655,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1818
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9242257996595844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6714146973130463,
+ "eval_f1_macro": 0.805096334723234,
+ "eval_loss": 0.271486759185791,
+ "eval_pr_auc": 0.6637007390734015,
+ "eval_precision": 0.6740331491712708,
+ "eval_precision_macro": 0.8060660592459934,
+ "eval_pred_class_0": 16591,
+ "eval_pred_class_1": 3077,
+ "eval_predicted_binding_ratio": 0.15644702054098028,
+ "eval_recall": 0.6688165108029668,
+ "eval_recall_macro": 0.8041372346976746,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.758,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1836
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8973459426479561,
+ "eval_auc": 0.9244978230054357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.673248098397799,
+ "eval_f1_macro": 0.8061779895433214,
+ "eval_loss": 0.2711206376552582,
+ "eval_pr_auc": 0.6646142778873767,
+ "eval_precision": 0.6757634827810266,
+ "eval_precision_macro": 0.8071101922645338,
+ "eval_pred_class_0": 16590,
+ "eval_pred_class_1": 3078,
+ "eval_predicted_binding_ratio": 0.15649786455155582,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.8052555669553397,
+ "eval_runtime": 0.2242,
+ "eval_samples_per_second": 727.107,
+ "eval_steps_per_second": 4.461,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1854
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8971934106162294,
+ "eval_auc": 0.9248346842593396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6732385261797027,
+ "eval_f1_macro": 0.8061196854381076,
+ "eval_loss": 0.2707342207431793,
+ "eval_pr_auc": 0.6659244139495173,
+ "eval_precision": 0.6747651441528992,
+ "eval_precision_macro": 0.8066847854532062,
+ "eval_pred_class_0": 16581,
+ "eval_pred_class_1": 3087,
+ "eval_predicted_binding_ratio": 0.15695546064673582,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8055581990104113,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.197,
+ "eval_steps_per_second": 3.921,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8975493186902583,
+ "eval_auc": 0.9250955152313902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746326497658647,
+ "eval_f1_macro": 0.8069177490147248,
+ "eval_loss": 0.2704195976257324,
+ "eval_pr_auc": 0.666907608407933,
+ "eval_precision": 0.675614489003881,
+ "eval_precision_macro": 0.8072811827258788,
+ "eval_pred_class_0": 16576,
+ "eval_pred_class_1": 3092,
+ "eval_predicted_binding_ratio": 0.15720968069961358,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8065558093510122,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.977,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1890
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8977018507219849,
+ "eval_auc": 0.9254069649305167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6749596122778675,
+ "eval_f1_macro": 0.8071292359343842,
+ "eval_loss": 0.26996490359306335,
+ "eval_pr_auc": 0.6681450085536085,
+ "eval_precision": 0.6762706377468436,
+ "eval_precision_macro": 0.8076147808433838,
+ "eval_pred_class_0": 16579,
+ "eval_pred_class_1": 3089,
+ "eval_predicted_binding_ratio": 0.15705714866788692,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8066463507888102,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.086,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1908
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8980577587960138,
+ "eval_auc": 0.9256414889578861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6758286176232822,
+ "eval_f1_macro": 0.8076738937412058,
+ "eval_loss": 0.2696084678173065,
+ "eval_pr_auc": 0.6691135839215693,
+ "eval_precision": 0.6776913099870299,
+ "eval_precision_macro": 0.8083644683075526,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.673976136730087,
+ "eval_recall_macro": 0.8069886719746289,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.158,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1926
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9258192627838369,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6769726247987118,
+ "eval_f1_macro": 0.8082079811791663,
+ "eval_loss": 0.26944610476493835,
+ "eval_pr_auc": 0.6696389857739906,
+ "eval_precision": 0.676101640398842,
+ "eval_precision_macro": 0.8078859551713395,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8085311854668409,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.512,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1944
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9261110139050743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.676779086654833,
+ "eval_f1_macro": 0.8081683537924276,
+ "eval_loss": 0.26896923780441284,
+ "eval_pr_auc": 0.6708410126864026,
+ "eval_precision": 0.6773255813953488,
+ "eval_precision_macro": 0.8083707318031536,
+ "eval_pred_class_0": 16572,
+ "eval_pred_class_1": 3096,
+ "eval_predicted_binding_ratio": 0.1574130567419158,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8079664377498563,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.096,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1962
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8985153548911938,
+ "eval_auc": 0.9263511243868452,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6783757653883339,
+ "eval_f1_macro": 0.809064127789247,
+ "eval_loss": 0.26861709356307983,
+ "eval_pr_auc": 0.6718712574127733,
+ "eval_precision": 0.677938808373591,
+ "eval_precision_macro": 0.808902387342021,
+ "eval_pred_class_0": 16563,
+ "eval_pred_class_1": 3105,
+ "eval_predicted_binding_ratio": 0.15787065283709578,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8092261637523704,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.936,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1980
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9265832055569767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6794644297467334,
+ "eval_f1_macro": 0.8097506232989936,
+ "eval_loss": 0.2682516574859619,
+ "eval_pr_auc": 0.6727910026400046,
+ "eval_precision": 0.6797934151065204,
+ "eval_precision_macro": 0.8098725675411902,
+ "eval_pred_class_0": 16570,
+ "eval_pred_class_1": 3098,
+ "eval_predicted_binding_ratio": 0.1575147447630669,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.809628845896721,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.644,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1998
+ },
+ {
+ "epoch": 111.11111111111111,
+ "grad_norm": 13330.4609375,
+ "learning_rate": 9.552616846852138e-07,
+ "loss": 0.252,
+ "step": 2000
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.899176327028676,
+ "eval_auc": 0.9269119888367457,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6793856103476152,
+ "eval_f1_macro": 0.8097842051315767,
+ "eval_loss": 0.2677896022796631,
+ "eval_pr_auc": 0.6743175064299574,
+ "eval_precision": 0.6812581063553826,
+ "eval_precision_macro": 0.8104795114507255,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.6775233795549823,
+ "eval_recall_macro": 0.8090942786590025,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.193,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2016
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.927058521341044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812832500403031,
+ "eval_f1_macro": 0.8108073208521016,
+ "eval_loss": 0.26760444045066833,
+ "eval_pr_auc": 0.6746134464200654,
+ "eval_precision": 0.6811734364925854,
+ "eval_precision_macro": 0.8107666047608406,
+ "eval_pred_class_0": 16566,
+ "eval_pred_class_1": 3102,
+ "eval_predicted_binding_ratio": 0.15771812080536912,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8108480555060766,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.901,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2034
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8993797030709783,
+ "eval_auc": 0.9272620862892311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6811664250040277,
+ "eval_f1_macro": 0.81071512110173,
+ "eval_loss": 0.267299622297287,
+ "eval_pr_auc": 0.6753372489001316,
+ "eval_precision": 0.6806181584030908,
+ "eval_precision_macro": 0.8105119532505733,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8109187523785011,
+ "eval_runtime": 0.2924,
+ "eval_samples_per_second": 557.453,
+ "eval_steps_per_second": 3.42,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2052
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.9274648142425077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6817962337035248,
+ "eval_f1_macro": 0.8110548055574955,
+ "eval_loss": 0.2670327126979828,
+ "eval_pr_auc": 0.6759104665767571,
+ "eval_precision": 0.6805912596401028,
+ "eval_precision_macro": 0.8106085073266955,
+ "eval_pred_class_0": 16556,
+ "eval_pred_class_1": 3112,
+ "eval_predicted_binding_ratio": 0.15822656091112466,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.811503344660859,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.841,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2070
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8995830791132805,
+ "eval_auc": 0.927707601161492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6824248271426274,
+ "eval_f1_macro": 0.8113938913621764,
+ "eval_loss": 0.2667410373687744,
+ "eval_pr_auc": 0.676645416517246,
+ "eval_precision": 0.6805644644002565,
+ "eval_precision_macro": 0.8107051929252038,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.812087936943217,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.094,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2088
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.927944061956154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6828796907714608,
+ "eval_f1_macro": 0.8117208850210732,
+ "eval_loss": 0.26635268330574036,
+ "eval_pr_auc": 0.6777081363329963,
+ "eval_precision": 0.6821106821106822,
+ "eval_precision_macro": 0.8114357758379498,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8120069041569002,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.137,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9282739839383012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6831315577078289,
+ "eval_f1_macro": 0.8119498952052617,
+ "eval_loss": 0.2659379541873932,
+ "eval_pr_auc": 0.6790830976589266,
+ "eval_precision": 0.6839043309631545,
+ "eval_precision_macro": 0.8122369488772572,
+ "eval_pred_class_0": 16574,
+ "eval_pred_class_1": 3094,
+ "eval_predicted_binding_ratio": 0.1573113687207647,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8116637557086703,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.745,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2124
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9284926050623749,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6838460299565148,
+ "eval_f1_macro": 0.812294615183528,
+ "eval_loss": 0.2656570076942444,
+ "eval_pr_auc": 0.6798372835892805,
+ "eval_precision": 0.6830759330759331,
+ "eval_precision_macro": 0.8120089810307202,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.8125811605253657,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.824,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2142
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9287595480437707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6841512469831054,
+ "eval_f1_macro": 0.8124418563951485,
+ "eval_loss": 0.26531943678855896,
+ "eval_pr_auc": 0.6808986584956077,
+ "eval_precision": 0.682723185613359,
+ "eval_precision_macro": 0.8119125170545953,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8129743340182352,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.857,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2160
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9289936341086871,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851344822032533,
+ "eval_f1_macro": 0.8130595887334677,
+ "eval_loss": 0.26495063304901123,
+ "eval_pr_auc": 0.6818525990139518,
+ "eval_precision": 0.6843629343629344,
+ "eval_precision_macro": 0.8127732546210807,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.8133468356833198,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.159,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2178
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.9010067114093959,
+ "eval_auc": 0.9291471740122346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6880307643005928,
+ "eval_f1_macro": 0.8146000626156236,
+ "eval_loss": 0.2648627460002899,
+ "eval_pr_auc": 0.6821768129155847,
+ "eval_precision": 0.6837579617834395,
+ "eval_precision_macro": 0.8130188647252145,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.692357304095453,
+ "eval_recall_macro": 0.8162094361365779,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.373,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2196
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9293803062922532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687851067244423,
+ "eval_f1_macro": 0.8145493064661928,
+ "eval_loss": 0.26444998383522034,
+ "eval_pr_auc": 0.6832369783380787,
+ "eval_precision": 0.6846645367412141,
+ "eval_precision_macro": 0.8133686693864494,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8157455657712839,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.052,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2214
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.929644952403895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6885774125986789,
+ "eval_f1_macro": 0.815114870687036,
+ "eval_loss": 0.26401567459106445,
+ "eval_pr_auc": 0.6842971435384069,
+ "eval_precision": 0.6880231809401159,
+ "eval_precision_macro": 0.8149088251035563,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8153213845367371,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 729.933,
+ "eval_steps_per_second": 4.478,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2232
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9298876614628875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6892782510850346,
+ "eval_f1_macro": 0.8154529561328843,
+ "eval_loss": 0.26374292373657227,
+ "eval_pr_auc": 0.6852698748697685,
+ "eval_precision": 0.6871794871794872,
+ "eval_precision_macro": 0.8146738625165021,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.691389874234118,
+ "eval_recall_macro": 0.8162387893534325,
+ "eval_runtime": 0.2352,
+ "eval_samples_per_second": 692.981,
+ "eval_steps_per_second": 4.251,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.9020235916209071,
+ "eval_auc": 0.9300761410376911,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909382518043304,
+ "eval_f1_macro": 0.8163612439650636,
+ "eval_loss": 0.2635449767112732,
+ "eval_pr_auc": 0.685931037905785,
+ "eval_precision": 0.6873005743458839,
+ "eval_precision_macro": 0.815012329026093,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.6946146404385682,
+ "eval_recall_macro": 0.8177304505385936,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.44,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2268
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9303049910181687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6925181013676589,
+ "eval_f1_macro": 0.8174102840403102,
+ "eval_loss": 0.263118714094162,
+ "eval_pr_auc": 0.6869422132706717,
+ "eval_precision": 0.691072575465639,
+ "eval_precision_macro": 0.8168725206674576,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6939696871976782,
+ "eval_recall_macro": 0.8179512225449368,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.976,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2286
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9304756990498765,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.2629205286502838,
+ "eval_pr_auc": 0.6875064110834537,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2304
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9306633221647718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937530110807772,
+ "eval_f1_macro": 0.818077689508494,
+ "eval_loss": 0.2625824213027954,
+ "eval_pr_auc": 0.6881187823465719,
+ "eval_precision": 0.690978886756238,
+ "eval_precision_macro": 0.8170466915947796,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.6965495001612383,
+ "eval_recall_macro": 0.8191204071096527,
+ "eval_runtime": 0.2718,
+ "eval_samples_per_second": 599.717,
+ "eval_steps_per_second": 3.679,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2322
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9308322783466673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2622954547405243,
+ "eval_pr_auc": 0.6887035254510873,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.881,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9309924460820045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.26203182339668274,
+ "eval_pr_auc": 0.6893090005690568,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.443,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2358
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9312000694822565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6936286310383566,
+ "eval_f1_macro": 0.817981812876073,
+ "eval_loss": 0.26177045702934265,
+ "eval_pr_auc": 0.6902838377634022,
+ "eval_precision": 0.6904153354632588,
+ "eval_precision_macro": 0.8167882699809945,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6968719767816833,
+ "eval_recall_macro": 0.8191911039820772,
+ "eval_runtime": 0.2756,
+ "eval_samples_per_second": 591.34,
+ "eval_steps_per_second": 3.628,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2376
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9314264084780033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2614164650440216,
+ "eval_pr_auc": 0.6912123921690412,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.769,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2394
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9316330780933575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934576434656807,
+ "eval_f1_macro": 0.8179352236654993,
+ "eval_loss": 0.26109954714775085,
+ "eval_pr_auc": 0.6919150376493911,
+ "eval_precision": 0.6913461538461538,
+ "eval_precision_macro": 0.817149992562429,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8187272336167832,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.557,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2412
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9318176062735843,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694733472066592,
+ "eval_f1_macro": 0.8185505131193367,
+ "eval_loss": 0.2609698474407196,
+ "eval_pr_auc": 0.69244594350898,
+ "eval_precision": 0.6897647806738716,
+ "eval_precision_macro": 0.8167078351983328,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.6997742663656885,
+ "eval_recall_macro": 0.8204309854192178,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.333,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2430
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9320372006475538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941536479304236,
+ "eval_f1_macro": 0.8184144035830462,
+ "eval_loss": 0.2604828178882599,
+ "eval_pr_auc": 0.6935318303087233,
+ "eval_precision": 0.6933719433719434,
+ "eval_precision_macro": 0.8181231697536046,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6949371170590132,
+ "eval_recall_macro": 0.8187065617889984,
+ "eval_runtime": 0.2485,
+ "eval_samples_per_second": 655.847,
+ "eval_steps_per_second": 4.024,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2448
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9321443165310757,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960972488803583,
+ "eval_f1_macro": 0.8193338378363828,
+ "eval_loss": 0.2604370415210724,
+ "eval_pr_auc": 0.693643099537468,
+ "eval_precision": 0.6905744208187877,
+ "eval_precision_macro": 0.8172857573610195,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8214285957598189,
+ "eval_runtime": 0.2215,
+ "eval_samples_per_second": 736.024,
+ "eval_steps_per_second": 4.515,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2466
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9323432098797633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695638229634381,
+ "eval_f1_macro": 0.8191484199531422,
+ "eval_loss": 0.2601032257080078,
+ "eval_pr_auc": 0.694473981068256,
+ "eval_precision": 0.691866028708134,
+ "eval_precision_macro": 0.817746962215919,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8205715519016554,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.928,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2484
+ },
+ {
+ "epoch": 138.88888888888889,
+ "grad_norm": 12954.3583984375,
+ "learning_rate": 9.068887706579789e-07,
+ "loss": 0.2385,
+ "step": 2500
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9325454122780963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969404186795491,
+ "eval_f1_macro": 0.8200635197304043,
+ "eval_loss": 0.25969693064689636,
+ "eval_pr_auc": 0.6954050242581626,
+ "eval_precision": 0.6960437439691219,
+ "eval_precision_macro": 0.819729100681946,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.820399150415129,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.439,
+ "eval_steps_per_second": 3.763,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2502
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9326487714170208,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971977582065653,
+ "eval_f1_macro": 0.8200261554019741,
+ "eval_loss": 0.2596379518508911,
+ "eval_pr_auc": 0.6956168134976223,
+ "eval_precision": 0.6924300254452926,
+ "eval_precision_macro": 0.8182556808417458,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8218312779041694,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 626.028,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2520
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9329799294265357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962556488056811,
+ "eval_f1_macro": 0.819733135205496,
+ "eval_loss": 0.25904589891433716,
+ "eval_pr_auc": 0.6970902220334548,
+ "eval_precision": 0.6969305331179322,
+ "eval_precision_macro": 0.8199852086334245,
+ "eval_pred_class_0": 16573,
+ "eval_pred_class_1": 3095,
+ "eval_predicted_binding_ratio": 0.15736221273134024,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8194817455984336,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.771,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2538
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9330630060376336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994061948322902,
+ "eval_f1_macro": 0.8214143192859533,
+ "eval_loss": 0.2590080201625824,
+ "eval_pr_auc": 0.697179333543334,
+ "eval_precision": 0.6961661341853035,
+ "eval_precision_macro": 0.8202078705755396,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.7026765559496937,
+ "eval_recall_macro": 0.8226366421928706,
+ "eval_runtime": 0.2495,
+ "eval_samples_per_second": 653.345,
+ "eval_steps_per_second": 4.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2556
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9332962551076398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991164658634538,
+ "eval_f1_macro": 0.8212745809731632,
+ "eval_loss": 0.25865858793258667,
+ "eval_pr_auc": 0.6981950669404262,
+ "eval_precision": 0.6965428937259923,
+ "eval_precision_macro": 0.8203156925109651,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8222434687000011,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.149,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9334749729859892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.2584296464920044,
+ "eval_pr_auc": 0.6986932637956595,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2769,
+ "eval_samples_per_second": 588.703,
+ "eval_steps_per_second": 3.612,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2592
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.9050742322554403,
+ "eval_auc": 0.9336868786857826,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991136180499598,
+ "eval_f1_macro": 0.8213807805320277,
+ "eval_loss": 0.25803840160369873,
+ "eval_pr_auc": 0.699660351667112,
+ "eval_precision": 0.6987757731958762,
+ "eval_precision_macro": 0.8212545854629465,
+ "eval_pred_class_0": 16564,
+ "eval_pred_class_1": 3104,
+ "eval_predicted_binding_ratio": 0.15781980882652025,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8215071467589017,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.163,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2610
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.9047183241814114,
+ "eval_auc": 0.9337996783486955,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995831997435076,
+ "eval_f1_macro": 0.8214817322060338,
+ "eval_loss": 0.2579362094402313,
+ "eval_pr_auc": 0.6999214829412527,
+ "eval_precision": 0.6955690149824674,
+ "eval_precision_macro": 0.8199882459220607,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7036439858110287,
+ "eval_recall_macro": 0.8229996352064741,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.317,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2628
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9339161666287131,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.701647736362182,
+ "eval_f1_macro": 0.8226388900943448,
+ "eval_loss": 0.2578524649143219,
+ "eval_pr_auc": 0.7001081914566338,
+ "eval_precision": 0.6961904761904761,
+ "eval_precision_macro": 0.820610070399391,
+ "eval_pred_class_0": 16518,
+ "eval_pred_class_1": 3150,
+ "eval_predicted_binding_ratio": 0.16015863331299574,
+ "eval_recall": 0.7071912286359239,
+ "eval_recall_macro": 0.8247128956603896,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.43,
+ "eval_steps_per_second": 3.954,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2646
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9341703697689739,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.25731131434440613,
+ "eval_pr_auc": 0.7012902373057504,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.858,
+ "eval_steps_per_second": 3.981,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2664
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.9052776082977425,
+ "eval_auc": 0.9343408636857047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003377834968635,
+ "eval_f1_macro": 0.8220430425380088,
+ "eval_loss": 0.25706911087036133,
+ "eval_pr_auc": 0.7018656885391451,
+ "eval_precision": 0.6986521181001284,
+ "eval_precision_macro": 0.8214140242506442,
+ "eval_pred_class_0": 16552,
+ "eval_pred_class_1": 3116,
+ "eval_predicted_binding_ratio": 0.1584299369534269,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8226763313236177,
+ "eval_runtime": 0.2513,
+ "eval_samples_per_second": 648.672,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2682
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9345392996069414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006758931445124,
+ "eval_f1_macro": 0.822259931959612,
+ "eval_loss": 0.2567782402038574,
+ "eval_pr_auc": 0.7026441101697649,
+ "eval_precision": 0.6993254095727593,
+ "eval_precision_macro": 0.8217557280421937,
+ "eval_pred_class_0": 16555,
+ "eval_pred_class_1": 3113,
+ "eval_predicted_binding_ratio": 0.15827740492170023,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8227668727614157,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.188,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2700
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.9051250762660159,
+ "eval_auc": 0.934685296823797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003853564547207,
+ "eval_f1_macro": 0.8220121780461352,
+ "eval_loss": 0.25664329528808594,
+ "eval_pr_auc": 0.7029644506395569,
+ "eval_precision": 0.6974736168851935,
+ "eval_precision_macro": 0.8209271234175075,
+ "eval_pred_class_0": 16541,
+ "eval_pred_class_1": 3127,
+ "eval_predicted_binding_ratio": 0.158989221069758,
+ "eval_recall": 0.7033215091905837,
+ "eval_recall_macro": 0.8231100212096456,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.856,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2718
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9349076649599691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008216529724505,
+ "eval_f1_macro": 0.8223840221758023,
+ "eval_loss": 0.25626465678215027,
+ "eval_pr_auc": 0.7040942732775656,
+ "eval_precision": 0.7002575660012879,
+ "eval_precision_macro": 0.82217322207805,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8225952985373008,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.699,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2736
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.905226764287167,
+ "eval_auc": 0.9350594919437002,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7014734144778988,
+ "eval_f1_macro": 0.8225728005545544,
+ "eval_loss": 0.2562600076198578,
+ "eval_pr_auc": 0.7046022469135804,
+ "eval_precision": 0.6967865097041044,
+ "eval_precision_macro": 0.8208289583316286,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7062237987745889,
+ "eval_recall_macro": 0.8243499026467862,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.279,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2754
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9352728672508359,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010773436243769,
+ "eval_f1_macro": 0.8224715159707777,
+ "eval_loss": 0.25581732392311096,
+ "eval_pr_auc": 0.7056629719926804,
+ "eval_precision": 0.6991661321359846,
+ "eval_precision_macro": 0.821758292654095,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.7029990325701386,
+ "eval_recall_macro": 0.8231902267335512,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.5,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2772
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.9354356530283926,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7025204687750842,
+ "eval_f1_macro": 0.8232752161134611,
+ "eval_loss": 0.25560733675956726,
+ "eval_pr_auc": 0.7062858637533224,
+ "eval_precision": 0.6994884910485933,
+ "eval_precision_macro": 0.8221444873622652,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7055788455336988,
+ "eval_recall_macro": 0.8244197722567993,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.27,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2790
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9356189159837551,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7028415475999358,
+ "eval_f1_macro": 0.8234659606184656,
+ "eval_loss": 0.2553412616252899,
+ "eval_pr_auc": 0.7070636650718337,
+ "eval_precision": 0.6998081841432225,
+ "eval_precision_macro": 0.8223345636556499,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7059013221541438,
+ "eval_recall_macro": 0.8246111910462879,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.017,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9357918624902231,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7031199742682535,
+ "eval_f1_macro": 0.8236899466727462,
+ "eval_loss": 0.25509998202323914,
+ "eval_pr_auc": 0.7077594222055618,
+ "eval_precision": 0.7013153673403913,
+ "eval_precision_macro": 0.8230158493399438,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8243689199497484,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.309,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2826
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9359727409833409,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7034593724859212,
+ "eval_f1_macro": 0.8239074586532139,
+ "eval_loss": 0.2548539340496063,
+ "eval_pr_auc": 0.7084539339673516,
+ "eval_precision": 0.7019910083493899,
+ "eval_precision_macro": 0.8233586792381238,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8244594613875464,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.242,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2844
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9360812778117108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7043380822794941,
+ "eval_f1_macro": 0.8242594639388645,
+ "eval_loss": 0.2548294961452484,
+ "eval_pr_auc": 0.708809734771939,
+ "eval_precision": 0.6993006993006993,
+ "eval_precision_macro": 0.822383674913635,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.709448564979039,
+ "eval_recall_macro": 0.8261735491038733,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.758,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2862
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9362697476540152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7040308334671591,
+ "eval_f1_macro": 0.82418310527748,
+ "eval_loss": 0.25451889634132385,
+ "eval_pr_auc": 0.70979034812957,
+ "eval_precision": 0.7012156110044786,
+ "eval_precision_macro": 0.8231322886360805,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8252458083732854,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.302,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2880
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9364340127714132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7047863796980405,
+ "eval_f1_macro": 0.824637224883753,
+ "eval_loss": 0.25425252318382263,
+ "eval_pr_auc": 0.7104452483887299,
+ "eval_precision": 0.70208,
+ "eval_precision_macro": 0.8236265925164723,
+ "eval_pred_class_0": 16543,
+ "eval_pred_class_1": 3125,
+ "eval_predicted_binding_ratio": 0.15888753304860687,
+ "eval_recall": 0.7075137052563689,
+ "eval_recall_macro": 0.8256588264315284,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.767,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2898
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9366581522223957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7045965927354548,
+ "eval_f1_macro": 0.8245456841795291,
+ "eval_loss": 0.25392982363700867,
+ "eval_pr_auc": 0.7114160919166963,
+ "eval_precision": 0.7023389939122077,
+ "eval_precision_macro": 0.8237022823552698,
+ "eval_pred_class_0": 16547,
+ "eval_pred_class_1": 3121,
+ "eval_predicted_binding_ratio": 0.15868415700630464,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8253967107696154,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.473,
+ "eval_steps_per_second": 4.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2916
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.936698911928028,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7069544364508393,
+ "eval_f1_macro": 0.8257724934589374,
+ "eval_loss": 0.25399070978164673,
+ "eval_pr_auc": 0.7113652786898896,
+ "eval_precision": 0.7010145846544071,
+ "eval_precision_macro": 0.8235604593370134,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7129958078039342,
+ "eval_recall_macro": 0.8280377119541189,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.019,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2934
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9369248810888143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7062780269058296,
+ "eval_f1_macro": 0.8254283885284618,
+ "eval_loss": 0.25362086296081543,
+ "eval_pr_auc": 0.7123203296069245,
+ "eval_precision": 0.7015590200445434,
+ "eval_precision_macro": 0.8236690712930735,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7110609480812641,
+ "eval_recall_macro": 0.827221184489114,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.444,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2952
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.9072096806996136,
+ "eval_auc": 0.9371562809840187,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7070155723230053,
+ "eval_f1_macro": 0.8259456391835222,
+ "eval_loss": 0.2532632350921631,
+ "eval_pr_auc": 0.7133697274093473,
+ "eval_precision": 0.7039641943734015,
+ "eval_precision_macro": 0.8248055554696512,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.8270996353096386,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.417,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2970
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9071588366890381,
+ "eval_auc": 0.9372476886142239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7072779737095223,
+ "eval_f1_macro": 0.8260542385315996,
+ "eval_loss": 0.2531469762325287,
+ "eval_pr_auc": 0.7136781835058345,
+ "eval_precision": 0.7032196365954734,
+ "eval_precision_macro": 0.8245394656269969,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7113834247017091,
+ "eval_recall_macro": 0.8275936861541986,
+ "eval_runtime": 0.2653,
+ "eval_samples_per_second": 614.378,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2988
+ },
+ {
+ "epoch": 166.66666666666666,
+ "grad_norm": 14056.4111328125,
+ "learning_rate": 8.432618494003656e-07,
+ "loss": 0.2279,
+ "step": 3000
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.9072605247101891,
+ "eval_auc": 0.9373718655684177,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7077859660365268,
+ "eval_f1_macro": 0.8263351175441593,
+ "eval_loss": 0.25298377871513367,
+ "eval_pr_auc": 0.7140728736694715,
+ "eval_precision": 0.7032792104425343,
+ "eval_precision_macro": 0.8246534613355044,
+ "eval_pred_class_0": 16527,
+ "eval_pred_class_1": 3141,
+ "eval_predicted_binding_ratio": 0.15970103721781573,
+ "eval_recall": 0.7123508545630441,
+ "eval_recall_macro": 0.8280472206056,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.943,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3006
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.9075655887736425,
+ "eval_auc": 0.9374749229998747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7094918504314478,
+ "eval_f1_macro": 0.8272654245808608,
+ "eval_loss": 0.25285741686820984,
+ "eval_pr_auc": 0.7143683695359583,
+ "eval_precision": 0.7031992397846056,
+ "eval_precision_macro": 0.8249204363177162,
+ "eval_pred_class_0": 16511,
+ "eval_pred_class_1": 3157,
+ "eval_predicted_binding_ratio": 0.16051454138702462,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.8296699396217176,
+ "eval_runtime": 0.2383,
+ "eval_samples_per_second": 683.908,
+ "eval_steps_per_second": 4.196,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3024
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.9076672767947935,
+ "eval_auc": 0.9377269168628721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7087876844130853,
+ "eval_f1_macro": 0.8269618180373584,
+ "eval_loss": 0.25237372517585754,
+ "eval_pr_auc": 0.7154735567799367,
+ "eval_precision": 0.7049441786283892,
+ "eval_precision_macro": 0.8255259815297635,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8284197222706846,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.364,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9378333125414714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7099151049175076,
+ "eval_f1_macro": 0.8275952704051472,
+ "eval_loss": 0.2522483766078949,
+ "eval_pr_auc": 0.7157732693940176,
+ "eval_precision": 0.7052832590706556,
+ "eval_precision_macro": 0.8258656401852128,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8293569716527538,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.855,
+ "eval_steps_per_second": 4.521,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3060
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9379969158489405,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7098221438871976,
+ "eval_f1_macro": 0.8275504434498686,
+ "eval_loss": 0.25200438499450684,
+ "eval_pr_auc": 0.7163721445757975,
+ "eval_precision": 0.7054140127388535,
+ "eval_precision_macro": 0.8259040054013725,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7142857142857143,
+ "eval_recall_macro": 0.8292259138217972,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.089,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3078
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9083282489322758,
+ "eval_auc": 0.9382153715205318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7102683593122289,
+ "eval_f1_macro": 0.8279092226905722,
+ "eval_loss": 0.25164341926574707,
+ "eval_pr_auc": 0.7173810220120935,
+ "eval_precision": 0.7078795643818065,
+ "eval_precision_macro": 0.8270148456503497,
+ "eval_pred_class_0": 16546,
+ "eval_pred_class_1": 3122,
+ "eval_predicted_binding_ratio": 0.1587350010168802,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8288120685011429,
+ "eval_runtime": 0.2387,
+ "eval_samples_per_second": 682.942,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3096
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.9084807809640024,
+ "eval_auc": 0.9383315970230777,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.711168164313222,
+ "eval_f1_macro": 0.8283970352740591,
+ "eval_loss": 0.2515103816986084,
+ "eval_pr_auc": 0.7177908159595219,
+ "eval_precision": 0.7077610986905142,
+ "eval_precision_macro": 0.8271223707155178,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8296889569246799,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.31,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3114
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.9088366890380313,
+ "eval_auc": 0.9384487568455233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7133493205435651,
+ "eval_f1_macro": 0.8295745121505045,
+ "eval_loss": 0.25142860412597656,
+ "eval_pr_auc": 0.7181535155231535,
+ "eval_precision": 0.7073557387444515,
+ "eval_precision_macro": 0.8273365831908039,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7194453402128346,
+ "eval_recall_macro": 0.8318660877438894,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.694,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3132
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9090400650803335,
+ "eval_auc": 0.9386484385266265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7135308246597278,
+ "eval_f1_macro": 0.8297338931856857,
+ "eval_loss": 0.2510823905467987,
+ "eval_pr_auc": 0.7190368898285651,
+ "eval_precision": 0.7086513994910941,
+ "eval_precision_macro": 0.8279095777411898,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.8315936361680839,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 616.851,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3150
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.9090909090909091,
+ "eval_auc": 0.9388042461024309,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7136450992953235,
+ "eval_f1_macro": 0.8298069567551197,
+ "eval_loss": 0.25084683299064636,
+ "eval_pr_auc": 0.7196986972872739,
+ "eval_precision": 0.7088768692332167,
+ "eval_precision_macro": 0.8280239111672891,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.83162381664735,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.711,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3168
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.9093451291437868,
+ "eval_auc": 0.9389100189010969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7144001281435207,
+ "eval_f1_macro": 0.8302608322100373,
+ "eval_loss": 0.25058600306510925,
+ "eval_pr_auc": 0.7201232901620662,
+ "eval_precision": 0.7097390197326544,
+ "eval_precision_macro": 0.8285170954889824,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7191228635923895,
+ "eval_recall_macro": 0.832036834705593,
+ "eval_runtime": 0.2429,
+ "eval_samples_per_second": 671.088,
+ "eval_steps_per_second": 4.117,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3186
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.9091417531014846,
+ "eval_auc": 0.9390935349014322,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.713392141138733,
+ "eval_f1_macro": 0.8297029283682245,
+ "eval_loss": 0.2502758800983429,
+ "eval_pr_auc": 0.7210120001490467,
+ "eval_precision": 0.7096362476068921,
+ "eval_precision_macro": 0.8282970157836081,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.7171880038697195,
+ "eval_recall_macro": 0.83112976580279,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.763,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3204
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9097518812283913,
+ "eval_auc": 0.939160767004228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7162270183852918,
+ "eval_f1_macro": 0.8312854205617097,
+ "eval_loss": 0.2502012550830841,
+ "eval_pr_auc": 0.7211443774602971,
+ "eval_precision": 0.7102092580849715,
+ "eval_precision_macro": 0.8290358389250096,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7223476297968398,
+ "eval_recall_macro": 0.8335888568492861,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.383,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3222
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9099552572706935,
+ "eval_auc": 0.9392979952395233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7171378374061651,
+ "eval_f1_macro": 0.8317964319305957,
+ "eval_loss": 0.2500424385070801,
+ "eval_pr_auc": 0.7216701546304439,
+ "eval_precision": 0.7104430379746836,
+ "eval_precision_macro": 0.8292946956289701,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8343648679211326,
+ "eval_runtime": 0.2706,
+ "eval_samples_per_second": 602.263,
+ "eval_steps_per_second": 3.695,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3240
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.910006101281269,
+ "eval_auc": 0.9394494621207929,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7173427020121367,
+ "eval_f1_macro": 0.8319131723763289,
+ "eval_loss": 0.24980410933494568,
+ "eval_pr_auc": 0.722382187322872,
+ "eval_precision": 0.7105346409364125,
+ "eval_precision_macro": 0.8293692166334694,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7242824895195098,
+ "eval_recall_macro": 0.8345261062313551,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.516,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3258
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9102094773235713,
+ "eval_auc": 0.9395796926893379,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7179814755669115,
+ "eval_f1_macro": 0.8322930296138966,
+ "eval_loss": 0.2495713084936142,
+ "eval_pr_auc": 0.7229232947500771,
+ "eval_precision": 0.7111673521037646,
+ "eval_precision_macro": 0.8297461525769929,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8349089438103321,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.403,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.910362009355298,
+ "eval_auc": 0.9396445500623882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7187749242303397,
+ "eval_f1_macro": 0.8327294647159501,
+ "eval_loss": 0.2494269460439682,
+ "eval_pr_auc": 0.7232071190390562,
+ "eval_precision": 0.7111742424242424,
+ "eval_precision_macro": 0.8298901515151516,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.726539825862625,
+ "eval_recall_macro": 0.8356547744029127,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.015,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3294
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9106670734187513,
+ "eval_auc": 0.9397990826808293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7191945021575835,
+ "eval_f1_macro": 0.8330396163256251,
+ "eval_loss": 0.24917152523994446,
+ "eval_pr_auc": 0.7237798760580164,
+ "eval_precision": 0.7129277566539924,
+ "eval_precision_macro": 0.830694740730097,
+ "eval_pred_class_0": 16512,
+ "eval_pred_class_1": 3156,
+ "eval_predicted_binding_ratio": 0.16046369737644905,
+ "eval_recall": 0.7255723960012899,
+ "eval_recall_macro": 0.8354426837856392,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.837,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3312
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9110738255033557,
+ "eval_auc": 0.9399604475135382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7199359487590072,
+ "eval_f1_macro": 0.8335408491792982,
+ "eval_loss": 0.24888525903224945,
+ "eval_pr_auc": 0.7244599264333298,
+ "eval_precision": 0.7150127226463104,
+ "eval_precision_macro": 0.8316954196625403,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8354220119578544,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.342,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3330
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9109721374822046,
+ "eval_auc": 0.9401063571379032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7194359878224643,
+ "eval_f1_macro": 0.8332638467590944,
+ "eval_loss": 0.2486649453639984,
+ "eval_pr_auc": 0.7249907062273525,
+ "eval_precision": 0.714968152866242,
+ "eval_precision_macro": 0.8315886262879129,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8349684775064528,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.8,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3348
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9112263575350824,
+ "eval_auc": 0.9402670406956853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7203715566944267,
+ "eval_f1_macro": 0.8338047799185901,
+ "eval_loss": 0.24845102429389954,
+ "eval_pr_auc": 0.7255585294747666,
+ "eval_precision": 0.7155583837098314,
+ "eval_precision_macro": 0.832000069313312,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.8356436112266088,
+ "eval_runtime": 0.2397,
+ "eval_samples_per_second": 680.081,
+ "eval_steps_per_second": 4.172,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3366
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9114805775879601,
+ "eval_auc": 0.9404174175370716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7215736446505677,
+ "eval_f1_macro": 0.8344742146415792,
+ "eval_loss": 0.24825866520404816,
+ "eval_pr_auc": 0.7262464197023197,
+ "eval_precision": 0.7157360406091371,
+ "eval_precision_macro": 0.8322867657635175,
+ "eval_pred_class_0": 16516,
+ "eval_pred_class_1": 3152,
+ "eval_predicted_binding_ratio": 0.16026032133414683,
+ "eval_recall": 0.72750725572396,
+ "eval_recall_macro": 0.8367119184396343,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.496,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3384
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9114297335773845,
+ "eval_auc": 0.9404972726910148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7223461906279885,
+ "eval_f1_macro": 0.8348286515416876,
+ "eval_loss": 0.24819281697273254,
+ "eval_pr_auc": 0.7264794032375063,
+ "eval_precision": 0.7141506460762685,
+ "eval_precision_macro": 0.8317646228259488,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8379923162699333,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.696,
+ "eval_steps_per_second": 4.035,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3402
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9116839536302623,
+ "eval_auc": 0.940656301723974,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.722922316158877,
+ "eval_f1_macro": 0.8351963018783921,
+ "eval_loss": 0.24791164696216583,
+ "eval_pr_auc": 0.7271914277283732,
+ "eval_precision": 0.7152777777777778,
+ "eval_precision_macro": 0.8323358585858586,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8381432186662634,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.211,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3420
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.9118873296725646,
+ "eval_auc": 0.9407912914845091,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7233838786911413,
+ "eval_f1_macro": 0.8354907358742514,
+ "eval_loss": 0.24768619239330292,
+ "eval_pr_auc": 0.727892471269696,
+ "eval_precision": 0.7161820480404552,
+ "eval_precision_macro": 0.8327941262984632,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8382639405833274,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.424,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3438
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9119381736831401,
+ "eval_auc": 0.9409245683252279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7240280433397068,
+ "eval_f1_macro": 0.8358192243316804,
+ "eval_loss": 0.2475385069847107,
+ "eval_pr_auc": 0.7283869598485145,
+ "eval_precision": 0.7155905511811024,
+ "eval_precision_macro": 0.832663401462133,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7326668816510803,
+ "eval_recall_macro": 0.8390804680483324,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.682,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3456
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9120398617042912,
+ "eval_auc": 0.9410766288889338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7232885476647473,
+ "eval_f1_macro": 0.8354987049773379,
+ "eval_loss": 0.24721089005470276,
+ "eval_pr_auc": 0.7290279509427341,
+ "eval_precision": 0.7175499841320215,
+ "eval_precision_macro": 0.8333466455139735,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.837699192866343,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.255,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3474
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9122432377465934,
+ "eval_auc": 0.9411616325348253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7239283429302623,
+ "eval_f1_macro": 0.8358790547924193,
+ "eval_loss": 0.2470986247062683,
+ "eval_pr_auc": 0.7293209780794321,
+ "eval_precision": 0.7181847032688036,
+ "eval_precision_macro": 0.8337245487646312,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.83808203044532,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.922,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3492
+ },
+ {
+ "epoch": 194.44444444444446,
+ "grad_norm": 15854.8017578125,
+ "learning_rate": 7.667662546617938e-07,
+ "loss": 0.2185,
+ "step": 3500
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9121923937360179,
+ "eval_auc": 0.9412192294636534,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7248685677871595,
+ "eval_f1_macro": 0.8363143165624445,
+ "eval_loss": 0.2470363825559616,
+ "eval_pr_auc": 0.7294473908430833,
+ "eval_precision": 0.7163098236775819,
+ "eval_precision_macro": 0.8331124670170592,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.839624543937532,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.796,
+ "eval_steps_per_second": 3.95,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9123449257677445,
+ "eval_auc": 0.9413068608842632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7252151737328658,
+ "eval_f1_macro": 0.8365353589310388,
+ "eval_loss": 0.2468923032283783,
+ "eval_pr_auc": 0.7297206006779651,
+ "eval_precision": 0.7169870784746297,
+ "eval_precision_macro": 0.8334556489675362,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.83971508537533,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.86,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3528
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9124466137888957,
+ "eval_auc": 0.9415050729580239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7246562200191877,
+ "eval_f1_macro": 0.8363018721763311,
+ "eval_loss": 0.2465437948703766,
+ "eval_pr_auc": 0.7306168212570879,
+ "eval_precision": 0.7186806216301934,
+ "eval_precision_macro": 0.8340602623742853,
+ "eval_pred_class_0": 16515,
+ "eval_pred_class_1": 3153,
+ "eval_predicted_binding_ratio": 0.1603111653447224,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8385959258552536,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.769,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3546
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9127008338417735,
+ "eval_auc": 0.9416286951597772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7254996003197443,
+ "eval_f1_macro": 0.836798347664482,
+ "eval_loss": 0.24635502696037292,
+ "eval_pr_auc": 0.7311574695224861,
+ "eval_precision": 0.7194039315155358,
+ "eval_precision_macro": 0.8345112185130059,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8391400017444531,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.086,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3564
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9126499898311979,
+ "eval_auc": 0.941704083096699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7255591054313099,
+ "eval_f1_macro": 0.8368090605158727,
+ "eval_loss": 0.24623039364814758,
+ "eval_pr_auc": 0.7315041353273113,
+ "eval_precision": 0.7188983855650523,
+ "eval_precision_macro": 0.8343113891602595,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7323444050306352,
+ "eval_recall_macro": 0.8393719369271001,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.096,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3582
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.912751677852349,
+ "eval_auc": 0.9417882886776758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266645428480408,
+ "eval_f1_macro": 0.8373778882187448,
+ "eval_loss": 0.24614199995994568,
+ "eval_pr_auc": 0.7317905290059212,
+ "eval_precision": 0.7179729304375196,
+ "eval_precision_macro": 0.8341244192542944,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8407428761951972,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.19,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3600
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9129042098840756,
+ "eval_auc": 0.9419125142943645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7274463007159905,
+ "eval_f1_macro": 0.837808654578745,
+ "eval_loss": 0.2459731251001358,
+ "eval_pr_auc": 0.7322365639924645,
+ "eval_precision": 0.717964824120603,
+ "eval_precision_macro": 0.8342614705412528,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7371815543373106,
+ "eval_recall_macro": 0.8414887067877777,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.607,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3618
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9131075859263779,
+ "eval_auc": 0.9421228725268236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7264286857691692,
+ "eval_f1_macro": 0.8373900508237788,
+ "eval_loss": 0.2455640733242035,
+ "eval_pr_auc": 0.7332689412482398,
+ "eval_precision": 0.721233312142403,
+ "eval_precision_macro": 0.8354381062588301,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8393814455785812,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.062,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3636
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9132092739475289,
+ "eval_auc": 0.9422253168108461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266613290632506,
+ "eval_f1_macro": 0.8375381529725912,
+ "eval_loss": 0.24535632133483887,
+ "eval_pr_auc": 0.7337329229212076,
+ "eval_precision": 0.7216921119592875,
+ "eval_precision_macro": 0.8356705536799585,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8394418065371132,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.363,
+ "eval_steps_per_second": 3.781,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3654
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.9133109619686801,
+ "eval_auc": 0.9423872461284946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7268066015061689,
+ "eval_f1_macro": 0.8376441226295008,
+ "eval_loss": 0.2451435625553131,
+ "eval_pr_auc": 0.7344416461934514,
+ "eval_precision": 0.7222929936305732,
+ "eval_precision_macro": 0.8359468356342605,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.8393711096646888,
+ "eval_runtime": 0.24,
+ "eval_samples_per_second": 679.026,
+ "eval_steps_per_second": 4.166,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3672
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9138194020744357,
+ "eval_auc": 0.9424586242758461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7293629251157592,
+ "eval_f1_macro": 0.8390563302747484,
+ "eval_loss": 0.24504327774047852,
+ "eval_pr_auc": 0.7346240551024029,
+ "eval_precision": 0.7223276407337128,
+ "eval_precision_macro": 0.8364152440915626,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7365366010964205,
+ "eval_recall_macro": 0.8417698397526527,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.36,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3690
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.91376855806386,
+ "eval_auc": 0.9426215171108914,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.729073482428115,
+ "eval_f1_macro": 0.8388988164347613,
+ "eval_loss": 0.2448122650384903,
+ "eval_pr_auc": 0.7352376772544322,
+ "eval_precision": 0.7223805001582779,
+ "eval_precision_macro": 0.8363855980711433,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7358916478555305,
+ "eval_recall_macro": 0.8414775436114739,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.41,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3708
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9139210900955868,
+ "eval_auc": 0.9426929439207377,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7299409794225554,
+ "eval_f1_macro": 0.8393709493840633,
+ "eval_loss": 0.24476298689842224,
+ "eval_pr_auc": 0.7354485111055644,
+ "eval_precision": 0.7222222222222222,
+ "eval_precision_macro": 0.8364747474747475,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8423544320350108,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.323,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3726
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9138702460850112,
+ "eval_auc": 0.9428956913390121,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.72896,
+ "eval_f1_macro": 0.8388800483588226,
+ "eval_loss": 0.24439764022827148,
+ "eval_pr_auc": 0.736435270049487,
+ "eval_precision": 0.723404255319149,
+ "eval_precision_macro": 0.8367914187788916,
+ "eval_pred_class_0": 16519,
+ "eval_pred_class_1": 3149,
+ "eval_predicted_binding_ratio": 0.16010778930242017,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.8410136732461799,
+ "eval_runtime": 0.2835,
+ "eval_samples_per_second": 574.921,
+ "eval_steps_per_second": 3.527,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9140736221273134,
+ "eval_auc": 0.9429205676063466,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.730805989168525,
+ "eval_f1_macro": 0.83984185960937,
+ "eval_loss": 0.24438706040382385,
+ "eval_pr_auc": 0.7364306271247294,
+ "eval_precision": 0.722064841045011,
+ "eval_precision_macro": 0.8365645289452815,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7397613673008707,
+ "eval_recall_macro": 0.8432313204585479,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.222,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3762
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9142769981696156,
+ "eval_auc": 0.9430619808161933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7309288222151292,
+ "eval_f1_macro": 0.8399730291904192,
+ "eval_loss": 0.2441486269235611,
+ "eval_pr_auc": 0.7370811402900591,
+ "eval_precision": 0.7235387045813586,
+ "eval_precision_macro": 0.8371980622222068,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7384714608190907,
+ "eval_recall_macro": 0.842827811051786,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.291,
+ "eval_steps_per_second": 3.738,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3780
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9146329062436445,
+ "eval_auc": 0.9431815542983806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7325155328978812,
+ "eval_f1_macro": 0.8408637738901821,
+ "eval_loss": 0.24397221207618713,
+ "eval_pr_auc": 0.7374915342644908,
+ "eval_precision": 0.7238664987405542,
+ "eval_precision_macro": 0.8376184300639468,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7413737504030957,
+ "eval_recall_macro": 0.8442185948852564,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.425,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3798
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9432977798009264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7319636884854276,
+ "eval_f1_macro": 0.8405258137499286,
+ "eval_loss": 0.24377743899822235,
+ "eval_pr_auc": 0.7380125440447487,
+ "eval_precision": 0.7230962869729389,
+ "eval_precision_macro": 0.837200053735105,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7410512737826508,
+ "eval_recall_macro": 0.8439668151372359,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.044,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3816
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9434619086633391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7311072056239016,
+ "eval_f1_macro": 0.8401129643018078,
+ "eval_loss": 0.24348998069763184,
+ "eval_pr_auc": 0.7388402605739814,
+ "eval_precision": 0.7245091830272324,
+ "eval_precision_macro": 0.8376331499630408,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8426562368276709,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 652.954,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3834
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9147854382753712,
+ "eval_auc": 0.943601764673353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310654685494223,
+ "eval_f1_macro": 0.8402185728440683,
+ "eval_loss": 0.24328412115573883,
+ "eval_pr_auc": 0.7395557790782865,
+ "eval_precision": 0.727563078888534,
+ "eval_precision_macro": 0.838897945080114,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.841556921872968,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.38,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3852
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9146837502542201,
+ "eval_auc": 0.9436924715636332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310897435897435,
+ "eval_f1_macro": 0.8401943762667112,
+ "eval_loss": 0.2431441992521286,
+ "eval_pr_auc": 0.7398761361047077,
+ "eval_precision": 0.7266645428480408,
+ "eval_precision_macro": 0.838527383046018,
+ "eval_pred_class_0": 16529,
+ "eval_pred_class_1": 3139,
+ "eval_predicted_binding_ratio": 0.15959934919666463,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8418897344073055,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 631.075,
+ "eval_steps_per_second": 3.872,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3870
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9147345942647956,
+ "eval_auc": 0.9437266715649686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7332591060919358,
+ "eval_f1_macro": 0.8412581348487456,
+ "eval_loss": 0.24318096041679382,
+ "eval_pr_auc": 0.739819563727558,
+ "eval_precision": 0.7234777150031387,
+ "eval_precision_macro": 0.8375913025931845,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7433086101257659,
+ "eval_recall_macro": 0.8450653028295274,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.716,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3888
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.915039658328249,
+ "eval_auc": 0.9438657295100676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7336202773792444,
+ "eval_f1_macro": 0.8415401994826537,
+ "eval_loss": 0.24295340478420258,
+ "eval_pr_auc": 0.740473021691125,
+ "eval_precision": 0.7254098360655737,
+ "eval_precision_macro": 0.8384566154139702,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8447221543812975,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.926,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3906
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9148362822859467,
+ "eval_auc": 0.9439684365715622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7333227193122114,
+ "eval_f1_macro": 0.8413247993777817,
+ "eval_loss": 0.24282881617546082,
+ "eval_pr_auc": 0.7409189185674594,
+ "eval_precision": 0.7242138364779874,
+ "eval_precision_macro": 0.837907500480624,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8448635481261465,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.434,
+ "eval_steps_per_second": 3.923,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3924
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9149888143176734,
+ "eval_auc": 0.9440560582596731,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7337579617834394,
+ "eval_f1_macro": 0.8415885646284089,
+ "eval_loss": 0.24268390238285065,
+ "eval_pr_auc": 0.7412204047828347,
+ "eval_precision": 0.724756212645486,
+ "eval_precision_macro": 0.8382104794199593,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7429861335053208,
+ "eval_recall_macro": 0.845085147394901,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.151,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3942
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9152430343705511,
+ "eval_auc": 0.9441587945186644,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7346808849275823,
+ "eval_f1_macro": 0.842123366857946,
+ "eval_loss": 0.24253520369529724,
+ "eval_pr_auc": 0.7415422610927452,
+ "eval_precision": 0.7253299811439347,
+ "eval_precision_macro": 0.8386142808788943,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7442760399871009,
+ "eval_recall_macro": 0.8457602811150571,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.818,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3960
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.9442562168332251,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7354531001589825,
+ "eval_f1_macro": 0.8425495241156832,
+ "eval_loss": 0.2423904687166214,
+ "eval_pr_auc": 0.741902799087436,
+ "eval_precision": 0.7253057384760113,
+ "eval_precision_macro": 0.8387436514456639,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8465061117076376,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.926,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.944399226172901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.734609250398724,
+ "eval_f1_macro": 0.8421428275824746,
+ "eval_loss": 0.24210123717784882,
+ "eval_pr_auc": 0.7426309034006747,
+ "eval_precision": 0.7267276743452193,
+ "eval_precision_macro": 0.8391805533372256,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8451955333980726,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.607,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3996
+ },
+ {
+ "epoch": 222.22222222222223,
+ "grad_norm": 16301.5107421875,
+ "learning_rate": 6.802697587657594e-07,
+ "loss": 0.211,
+ "step": 4000
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9156497864551556,
+ "eval_auc": 0.9445356174132858,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.735026353617633,
+ "eval_f1_macro": 0.8424338117294514,
+ "eval_loss": 0.2418563961982727,
+ "eval_pr_auc": 0.7432491183346004,
+ "eval_precision": 0.7281645569620253,
+ "eval_precision_macro": 0.8398516024451512,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8450843201324897,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.886,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4014
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9159040065080334,
+ "eval_auc": 0.9445885524751136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.736624203821656,
+ "eval_f1_macro": 0.8432939508943711,
+ "eval_loss": 0.24180874228477478,
+ "eval_pr_auc": 0.7434863322076849,
+ "eval_precision": 0.7275872916011324,
+ "eval_precision_macro": 0.8398989281099847,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8468079165002977,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.671,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4032
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9159548505186089,
+ "eval_auc": 0.9446536531606378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7374106433677522,
+ "eval_f1_macro": 0.8436909456056703,
+ "eval_loss": 0.24177636206150055,
+ "eval_pr_auc": 0.7437995583771988,
+ "eval_precision": 0.7266750156543519,
+ "eval_precision_macro": 0.8396638402297497,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7484682360528861,
+ "eval_recall_macro": 0.8478865596272157,
+ "eval_runtime": 0.2167,
+ "eval_samples_per_second": 752.113,
+ "eval_steps_per_second": 4.614,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4050
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9161582265609112,
+ "eval_auc": 0.9447719419529625,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.737629276054097,
+ "eval_f1_macro": 0.843868342907385,
+ "eval_loss": 0.24154822528362274,
+ "eval_pr_auc": 0.7443388353128296,
+ "eval_precision": 0.7280150753768844,
+ "eval_precision_macro": 0.8402572343640063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7475008061915511,
+ "eval_recall_macro": 0.8476141080514102,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.202,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4068
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9448749507219245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7381974248927039,
+ "eval_f1_macro": 0.8441781495775367,
+ "eval_loss": 0.2414349913597107,
+ "eval_pr_auc": 0.7447448668341484,
+ "eval_precision": 0.7278996865203762,
+ "eval_precision_macro": 0.840312265884293,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8481987003337683,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.465,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4086
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9163107585926378,
+ "eval_auc": 0.9449481196490842,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.738314785373609,
+ "eval_f1_macro": 0.8442527143596241,
+ "eval_loss": 0.2413274347782135,
+ "eval_pr_auc": 0.7451543649133733,
+ "eval_precision": 0.7281279397930386,
+ "eval_precision_macro": 0.840427826926679,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8482288808130343,
+ "eval_runtime": 0.2185,
+ "eval_samples_per_second": 746.149,
+ "eval_steps_per_second": 4.578,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4104
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9450982726429946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7376135096383623,
+ "eval_f1_macro": 0.843896745442007,
+ "eval_loss": 0.2410273402929306,
+ "eval_pr_auc": 0.7459954701052622,
+ "eval_precision": 0.728904282115869,
+ "eval_precision_macro": 0.8406224054285385,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7465333763302161,
+ "eval_recall_macro": 0.8472812955170728,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.64,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4122
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9167683546878178,
+ "eval_auc": 0.9451833444163787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7392066273697626,
+ "eval_f1_macro": 0.8448445490519523,
+ "eval_loss": 0.2408701479434967,
+ "eval_pr_auc": 0.74641099889946,
+ "eval_precision": 0.7304785894206549,
+ "eval_precision_macro": 0.8415611477299734,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7481457594324411,
+ "eval_recall_macro": 0.8482383894645154,
+ "eval_runtime": 0.2678,
+ "eval_samples_per_second": 608.615,
+ "eval_steps_per_second": 3.734,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4140
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9167175106772423,
+ "eval_auc": 0.9452793360535927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7393380012730745,
+ "eval_f1_macro": 0.8448898647294817,
+ "eval_loss": 0.24077175557613373,
+ "eval_pr_auc": 0.7467905435777061,
+ "eval_precision": 0.7298146402764687,
+ "eval_precision_macro": 0.8413101105537636,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7491131892937762,
+ "eval_recall_macro": 0.8486013824781189,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.884,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4158
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9163616026032133,
+ "eval_auc": 0.9453335850027798,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7391786903440621,
+ "eval_f1_macro": 0.8446869866386211,
+ "eval_loss": 0.24075280129909515,
+ "eval_pr_auc": 0.7469096614042753,
+ "eval_precision": 0.7270742358078602,
+ "eval_precision_macro": 0.840149923152381,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.8494385817709088,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.647,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4176
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9168191986983933,
+ "eval_auc": 0.9454244865430391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7405645417063115,
+ "eval_f1_macro": 0.845516906033295,
+ "eval_loss": 0.24061860144138336,
+ "eval_pr_auc": 0.7472467341999135,
+ "eval_precision": 0.7285491419656787,
+ "eval_precision_macro": 0.8410102813636934,
+ "eval_pred_class_0": 16463,
+ "eval_pred_class_1": 3205,
+ "eval_predicted_binding_ratio": 0.1629550538946512,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8502344374081289,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.77,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4194
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9171242627618467,
+ "eval_auc": 0.9455514956544295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7409408773045136,
+ "eval_f1_macro": 0.8458063544009555,
+ "eval_loss": 0.24034352600574493,
+ "eval_pr_auc": 0.7478642707879094,
+ "eval_precision": 0.7304920087746788,
+ "eval_precision_macro": 0.841880100399963,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.849891288959899,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.626,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9170734187512711,
+ "eval_auc": 0.945608099868364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7413984461709212,
+ "eval_f1_macro": 0.8460087995182923,
+ "eval_loss": 0.24029456079006195,
+ "eval_pr_auc": 0.7479864003180418,
+ "eval_precision": 0.7292576419213974,
+ "eval_precision_macro": 0.8414542370705274,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8507785132973285,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.582,
+ "eval_steps_per_second": 4.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4230
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.9172767947935733,
+ "eval_auc": 0.9457173958316524,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7416229950770208,
+ "eval_f1_macro": 0.8461890816058248,
+ "eval_loss": 0.2400863915681839,
+ "eval_pr_auc": 0.7485735786505677,
+ "eval_precision": 0.7306007509386734,
+ "eval_precision_macro": 0.8420487970332027,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.850506061721523,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.11,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4248
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9173784828147244,
+ "eval_auc": 0.9457852508143816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7421044278685923,
+ "eval_f1_macro": 0.8464570875531852,
+ "eval_loss": 0.24001120030879974,
+ "eval_pr_auc": 0.7487297504117033,
+ "eval_precision": 0.730625,
+ "eval_precision_macro": 0.8421463596065095,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8509595961729245,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.497,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4266
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9176835468781778,
+ "eval_auc": 0.9458918216779619,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7428934413212641,
+ "eval_f1_macro": 0.8469453737675663,
+ "eval_loss": 0.23980914056301117,
+ "eval_pr_auc": 0.7491659673680734,
+ "eval_precision": 0.7318523153942428,
+ "eval_precision_macro": 0.8427959974251447,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7542728152208965,
+ "eval_recall_macro": 0.8512717368794771,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.674,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4284
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9175818588570266,
+ "eval_auc": 0.9459814190633611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7427392477384542,
+ "eval_f1_macro": 0.8468350393376697,
+ "eval_loss": 0.2397017627954483,
+ "eval_pr_auc": 0.7495363660441035,
+ "eval_precision": 0.73125,
+ "eval_precision_macro": 0.8425195834345397,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8513424337519016,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.187,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4302
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9177852348993288,
+ "eval_auc": 0.9460787245879343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7425569176882661,
+ "eval_f1_macro": 0.846819224235148,
+ "eval_loss": 0.2394852489233017,
+ "eval_pr_auc": 0.7500368484248636,
+ "eval_precision": 0.7333333333333333,
+ "eval_precision_macro": 0.8433466763706938,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7520154788777813,
+ "eval_recall_macro": 0.8504146930213136,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.01,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4320
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.91788692292048,
+ "eval_auc": 0.9461055278900622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7437728066000318,
+ "eval_f1_macro": 0.8474411515820368,
+ "eval_loss": 0.23946216702461243,
+ "eval_pr_auc": 0.7500126507936957,
+ "eval_precision": 0.7320424734540912,
+ "eval_precision_macro": 0.8430344761294506,
+ "eval_pred_class_0": 16466,
+ "eval_pred_class_1": 3202,
+ "eval_predicted_binding_ratio": 0.16280252186292454,
+ "eval_recall": 0.7558851983231216,
+ "eval_recall_macro": 0.8520477479513235,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.523,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4338
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9178360789099044,
+ "eval_auc": 0.9462651603379567,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7429207763283487,
+ "eval_f1_macro": 0.8470125818101653,
+ "eval_loss": 0.23918889462947845,
+ "eval_pr_auc": 0.7507506323790389,
+ "eval_precision": 0.7331240188383046,
+ "eval_precision_macro": 0.8433259480225619,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8508380469934491,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.073,
+ "eval_steps_per_second": 3.724,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4356
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9181411429733577,
+ "eval_auc": 0.9463650352422546,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7440381558028617,
+ "eval_f1_macro": 0.847659094847506,
+ "eval_loss": 0.2390899360179901,
+ "eval_pr_auc": 0.7511682669955798,
+ "eval_precision": 0.7337723424270931,
+ "eval_precision_macro": 0.8437961778887089,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8516744190238277,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.141,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4374
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.918446207036811,
+ "eval_auc": 0.9465344196541042,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7436061381074168,
+ "eval_f1_macro": 0.8475588127054617,
+ "eval_loss": 0.2388136237859726,
+ "eval_pr_auc": 0.7520628478642977,
+ "eval_precision": 0.7372424722662441,
+ "eval_precision_macro": 0.8451548762954184,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7500806191551113,
+ "eval_recall_macro": 0.8500206922660327,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.538,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4392
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9182936750050844,
+ "eval_auc": 0.946561008841255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7443931923015746,
+ "eval_f1_macro": 0.8478842115097998,
+ "eval_loss": 0.2388090342283249,
+ "eval_pr_auc": 0.7520269916576209,
+ "eval_precision": 0.7344632768361582,
+ "eval_precision_macro": 0.844145847858681,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8517649604616258,
+ "eval_runtime": 0.2356,
+ "eval_samples_per_second": 691.824,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4410
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.9185478950579622,
+ "eval_auc": 0.9466103526109676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7456335344553826,
+ "eval_f1_macro": 0.8485719582198821,
+ "eval_loss": 0.23877908289432526,
+ "eval_pr_auc": 0.7522268586665612,
+ "eval_precision": 0.7344385361276197,
+ "eval_precision_macro": 0.844360910951309,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8529643255056077,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.409,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4428
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9181919869839333,
+ "eval_auc": 0.9466863828928207,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.74464370734804,
+ "eval_f1_macro": 0.847968894691123,
+ "eval_loss": 0.23864901065826416,
+ "eval_pr_auc": 0.7525479720158522,
+ "eval_precision": 0.733125,
+ "eval_precision_macro": 0.84363925491863,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8524909464888326,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.776,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9184970510473867,
+ "eval_auc": 0.9467725739035849,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7454343338097507,
+ "eval_f1_macro": 0.8484579580910492,
+ "eval_loss": 0.23847386240959167,
+ "eval_pr_auc": 0.752960490937812,
+ "eval_precision": 0.7343554443053817,
+ "eval_precision_macro": 0.8442903982090288,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7568526281844566,
+ "eval_recall_macro": 0.8528030871953852,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.673,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4464
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9183953630262355,
+ "eval_auc": 0.9468334214870647,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7452785272179019,
+ "eval_f1_macro": 0.8483468464756075,
+ "eval_loss": 0.238382488489151,
+ "eval_pr_auc": 0.7531576423642267,
+ "eval_precision": 0.73375,
+ "eval_precision_macro": 0.8440124787466602,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8528737840678097,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.912,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4482
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 32703.09375,
+ "learning_rate": 5.870150616070439e-07,
+ "loss": 0.2045,
+ "step": 4500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9189546471425666,
+ "eval_auc": 0.9469757203543168,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7464206172446707,
+ "eval_f1_macro": 0.849095331315225,
+ "eval_loss": 0.23810486495494843,
+ "eval_pr_auc": 0.7538503066163141,
+ "eval_precision": 0.7365777080062794,
+ "eval_precision_macro": 0.8453864697284325,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8529436536778228,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.492,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9192088671954444,
+ "eval_auc": 0.9470911964544428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465305471367044,
+ "eval_f1_macro": 0.8492382980338313,
+ "eval_loss": 0.23794293403625488,
+ "eval_pr_auc": 0.7543678751573928,
+ "eval_precision": 0.7386363636363636,
+ "eval_precision_macro": 0.8462575757575758,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8523082090884139,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4518
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9186495830791133,
+ "eval_auc": 0.9471146615094285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7459510955859003,
+ "eval_f1_macro": 0.8487610069611806,
+ "eval_loss": 0.2380078136920929,
+ "eval_pr_auc": 0.7543445385135205,
+ "eval_precision": 0.7347513293712856,
+ "eval_precision_macro": 0.8445476639570896,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8531557442950962,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.051,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4536
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9187004270896888,
+ "eval_auc": 0.9472153636761378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7461501825686617,
+ "eval_f1_macro": 0.8488749520465066,
+ "eval_loss": 0.23785638809204102,
+ "eval_pr_auc": 0.7549126832149435,
+ "eval_precision": 0.7348342714196373,
+ "eval_precision_macro": 0.8446181071730852,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8533169826053187,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.46,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4554
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.918903803131991,
+ "eval_auc": 0.9473291852514412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465437788018433,
+ "eval_f1_macro": 0.849136671654349,
+ "eval_loss": 0.23767386376857758,
+ "eval_pr_auc": 0.7554825230801616,
+ "eval_precision": 0.7359022556390977,
+ "eval_precision_macro": 0.8451300547435596,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8533066466914263,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.53,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4572
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9196156192800488,
+ "eval_auc": 0.9474168847995438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7481280866656046,
+ "eval_f1_macro": 0.8501522492676461,
+ "eval_loss": 0.23749451339244843,
+ "eval_pr_auc": 0.7559063738482461,
+ "eval_precision": 0.739294710327456,
+ "eval_precision_macro": 0.8468181046180088,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8535981155701939,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.581,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4590
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9195647752694732,
+ "eval_auc": 0.9474761751831905,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7480089200382287,
+ "eval_f1_macro": 0.8500768176935048,
+ "eval_loss": 0.23738548159599304,
+ "eval_pr_auc": 0.7561572732622138,
+ "eval_precision": 0.7390620081838212,
+ "eval_precision_macro": 0.8467003692001515,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853567935090928,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.111,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4608
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9475989409250355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486051331101546,
+ "eval_f1_macro": 0.8504541559450298,
+ "eval_loss": 0.23718814551830292,
+ "eval_pr_auc": 0.7567216824275462,
+ "eval_precision": 0.7402269861286255,
+ "eval_precision_macro": 0.8472897782243516,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853718837487258,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.368,
+ "eval_steps_per_second": 4.665,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4626
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9193613992271711,
+ "eval_auc": 0.9475927315907009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.748013981569749,
+ "eval_f1_macro": 0.8500072329009691,
+ "eval_loss": 0.23723167181015015,
+ "eval_pr_auc": 0.7565508642499409,
+ "eval_precision": 0.7372377074851237,
+ "eval_precision_macro": 0.845948140540741,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.8542335601596028,
+ "eval_runtime": 0.2406,
+ "eval_samples_per_second": 677.35,
+ "eval_steps_per_second": 4.156,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4644
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9197681513117755,
+ "eval_auc": 0.9476849762158163,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486460656259956,
+ "eval_f1_macro": 0.8504558902151395,
+ "eval_loss": 0.23700466752052307,
+ "eval_pr_auc": 0.7570817989267699,
+ "eval_precision": 0.7396915328926661,
+ "eval_precision_macro": 0.8470757706910725,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8539507726699049,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.906,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4662
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9199715273540777,
+ "eval_auc": 0.9477206555569931,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495225970719287,
+ "eval_f1_macro": 0.8509503339952407,
+ "eval_loss": 0.23693729937076569,
+ "eval_pr_auc": 0.7572519889982183,
+ "eval_precision": 0.7398680490103676,
+ "eval_precision_macro": 0.8473073942352414,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8547267837417515,
+ "eval_runtime": 0.2431,
+ "eval_samples_per_second": 670.398,
+ "eval_steps_per_second": 4.113,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9477750407611654,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7492447129909365,
+ "eval_f1_macro": 0.8507623994645729,
+ "eval_loss": 0.23685960471630096,
+ "eval_pr_auc": 0.7574807332819814,
+ "eval_precision": 0.739021329987453,
+ "eval_precision_macro": 0.8469075096539207,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.85476730013491,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.06,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4698
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.947870010485989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23665639758110046,
+ "eval_pr_auc": 0.7579557575566394,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4716
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9201749033963799,
+ "eval_auc": 0.9479619047411422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501591343093571,
+ "eval_f1_macro": 0.8513291133243506,
+ "eval_loss": 0.23653987050056458,
+ "eval_pr_auc": 0.7584013256784117,
+ "eval_precision": 0.7404963870562362,
+ "eval_precision_macro": 0.8476822244653337,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7600773943889068,
+ "eval_recall_macro": 0.8551096213207285,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.941,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4734
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.9203274354281066,
+ "eval_auc": 0.9480515799865329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501195981502152,
+ "eval_f1_macro": 0.8513640482812168,
+ "eval_loss": 0.2363332211971283,
+ "eval_pr_auc": 0.7588834286830018,
+ "eval_precision": 0.7419558359621451,
+ "eval_precision_macro": 0.848278196802748,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7584650112866818,
+ "eval_recall_macro": 0.8545448736037441,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.351,
+ "eval_steps_per_second": 3.953,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4752
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9481172938194913,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.750557502389296,
+ "eval_f1_macro": 0.8515931077800434,
+ "eval_loss": 0.23625436425209045,
+ "eval_pr_auc": 0.7591087523185005,
+ "eval_precision": 0.7415801070192005,
+ "eval_precision_macro": 0.8482019751638359,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8550992854068361,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.903,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4770
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.9482093632596256,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23614051938056946,
+ "eval_pr_auc": 0.7594934170637511,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.024,
+ "eval_steps_per_second": 3.914,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4788
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9204291234492576,
+ "eval_auc": 0.9483264257570818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7504385265507894,
+ "eval_f1_macro": 0.8515537559413557,
+ "eval_loss": 0.23595084249973297,
+ "eval_pr_auc": 0.7600448623712702,
+ "eval_precision": 0.7422712933753943,
+ "eval_precision_macro": 0.8484662322132155,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7587874879071267,
+ "eval_recall_macro": 0.8547362923932326,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.531,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4806
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9483670102777331,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7509541984732825,
+ "eval_f1_macro": 0.8517842887791249,
+ "eval_loss": 0.23594258725643158,
+ "eval_pr_auc": 0.7601171664174119,
+ "eval_precision": 0.740822089739567,
+ "eval_precision_macro": 0.8479609508220922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8557545745616185,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.883,
+ "eval_steps_per_second": 4.061,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4824
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.920276591417531,
+ "eval_auc": 0.9484603741402287,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7505567928730512,
+ "eval_f1_macro": 0.8515567625484772,
+ "eval_loss": 0.23575998842716217,
+ "eval_pr_auc": 0.7605631058573062,
+ "eval_precision": 0.7406593406593407,
+ "eval_precision_macro": 0.8478216317444613,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8554320979411736,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.928,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4842
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9206833435021354,
+ "eval_auc": 0.9485513243429828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.751434034416826,
+ "eval_f1_macro": 0.8521235507837306,
+ "eval_loss": 0.235606849193573,
+ "eval_pr_auc": 0.7610077759721279,
+ "eval_precision": 0.7426771653543307,
+ "eval_precision_macro": 0.8488138752255192,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7603998710093518,
+ "eval_recall_macro": 0.8555424839443451,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.303,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4860
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.948647082400222,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7519948930737312,
+ "eval_f1_macro": 0.8525018311755109,
+ "eval_loss": 0.23542079329490662,
+ "eval_pr_auc": 0.7614859215167992,
+ "eval_precision": 0.744391785150079,
+ "eval_precision_macro": 0.8496242389363071,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8554614511580283,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.61,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4878
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487102755159504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517120560598821,
+ "eval_f1_macro": 0.8522755458325244,
+ "eval_loss": 0.2353215366601944,
+ "eval_pr_auc": 0.761752604262035,
+ "eval_precision": 0.7426054122089364,
+ "eval_precision_macro": 0.8488345435817272,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8558347800855242,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.279,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4896
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487825490532058,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517911160643209,
+ "eval_f1_macro": 0.8523136490925144,
+ "eval_loss": 0.23522616922855377,
+ "eval_pr_auc": 0.7620046741511783,
+ "eval_precision": 0.7424528301886792,
+ "eval_precision_macro": 0.8487858522607636,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8559658379164806,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.657,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9208358755338621,
+ "eval_auc": 0.9488776355680169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7518725099601593,
+ "eval_f1_macro": 0.8523888728682258,
+ "eval_loss": 0.2350645512342453,
+ "eval_pr_auc": 0.7623918274747735,
+ "eval_precision": 0.7432262129804663,
+ "eval_precision_macro": 0.8491200787225601,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8557640832130997,
+ "eval_runtime": 0.2427,
+ "eval_samples_per_second": 671.591,
+ "eval_steps_per_second": 4.12,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4932
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9489531597599242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7525510204081632,
+ "eval_f1_macro": 0.8528058755561261,
+ "eval_loss": 0.2349635362625122,
+ "eval_pr_auc": 0.7627186958629152,
+ "eval_precision": 0.7442447177546515,
+ "eval_precision_macro": 0.8496637300357182,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8560460434403863,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.549,
+ "eval_steps_per_second": 3.838,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4950
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9489723619803666,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524626628535113,
+ "eval_f1_macro": 0.8526552767085183,
+ "eval_loss": 0.2350020557641983,
+ "eval_pr_auc": 0.7627735453579831,
+ "eval_precision": 0.7416222987785781,
+ "eval_precision_macro": 0.8485653223786669,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8569134232124421,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.627,
+ "eval_steps_per_second": 3.93,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4968
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.9490740568619694,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524689391525964,
+ "eval_f1_macro": 0.8527303253449472,
+ "eval_loss": 0.2348015159368515,
+ "eval_pr_auc": 0.7632371897507115,
+ "eval_precision": 0.7434686811457349,
+ "eval_precision_macro": 0.8493281796365992,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7616897774911319,
+ "eval_recall_macro": 0.8562477981437672,
+ "eval_runtime": 0.2476,
+ "eval_samples_per_second": 658.287,
+ "eval_steps_per_second": 4.039,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4986
+ },
+ {
+ "epoch": 277.77777777777777,
+ "grad_norm": 14799.8212890625,
+ "learning_rate": 4.904982238472025e-07,
+ "loss": 0.199,
+ "step": 5000
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9491519363186243,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7514358647096363,
+ "eval_f1_macro": 0.8521604145127957,
+ "eval_loss": 0.23470228910446167,
+ "eval_pr_auc": 0.7636185597213633,
+ "eval_precision": 0.7436059362172402,
+ "eval_precision_macro": 0.8491982774838095,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8552096714100077,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.482,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5004
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9209375635550132,
+ "eval_auc": 0.9492209202712323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7521912350597609,
+ "eval_f1_macro": 0.8525784825369885,
+ "eval_loss": 0.2345963418483734,
+ "eval_pr_auc": 0.7638862490185815,
+ "eval_precision": 0.7435412728418399,
+ "eval_precision_macro": 0.8493079227068421,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8559555020025882,
+ "eval_runtime": 0.273,
+ "eval_samples_per_second": 597.168,
+ "eval_steps_per_second": 3.664,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5022
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9492453877736104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7532591414944356,
+ "eval_f1_macro": 0.8531471523002045,
+ "eval_loss": 0.23453067243099213,
+ "eval_pr_auc": 0.7639137465976396,
+ "eval_precision": 0.7428661022264033,
+ "eval_precision_macro": 0.8492229655497572,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8572255639189947,
+ "eval_runtime": 0.2247,
+ "eval_samples_per_second": 725.266,
+ "eval_steps_per_second": 4.449,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5040
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9493002596027307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7531806615776081,
+ "eval_f1_macro": 0.8531093334515976,
+ "eval_loss": 0.2344331294298172,
+ "eval_pr_auc": 0.7641447446055818,
+ "eval_precision": 0.7430185127078758,
+ "eval_precision_macro": 0.8492715280607518,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8570945060880382,
+ "eval_runtime": 0.263,
+ "eval_samples_per_second": 619.693,
+ "eval_steps_per_second": 3.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5058
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9493485327975579,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7538534880025425,
+ "eval_f1_macro": 0.8534875889608693,
+ "eval_loss": 0.2344052791595459,
+ "eval_pr_auc": 0.764296468123137,
+ "eval_precision": 0.7431077694235589,
+ "eval_precision_macro": 0.849430796583593,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8577092788496623,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.097,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5076
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9494190544850012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7536969311496263,
+ "eval_f1_macro": 0.8534121476034391,
+ "eval_loss": 0.23425185680389404,
+ "eval_pr_auc": 0.7646652388104445,
+ "eval_precision": 0.7434127979924717,
+ "eval_precision_macro": 0.8495280009379834,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8574471631877492,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.881,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5094
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495173137944721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7539000318369946,
+ "eval_f1_macro": 0.853564041452472,
+ "eval_loss": 0.2340681403875351,
+ "eval_pr_auc": 0.7651886756989377,
+ "eval_precision": 0.744419993712669,
+ "eval_precision_macro": 0.8499803613859639,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8572755889636343,
+ "eval_runtime": 0.2477,
+ "eval_samples_per_second": 658.168,
+ "eval_steps_per_second": 4.038,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5112
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495497619459952,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7540566337893733,
+ "eval_f1_macro": 0.8536395120535369,
+ "eval_loss": 0.234034925699234,
+ "eval_pr_auc": 0.7653794962608654,
+ "eval_precision": 0.7441130298273155,
+ "eval_precision_macro": 0.8498821534503319,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8575377046255472,
+ "eval_runtime": 0.2024,
+ "eval_samples_per_second": 805.487,
+ "eval_steps_per_second": 4.942,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5130
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9215476916819199,
+ "eval_auc": 0.9496228140831674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.754181934044926,
+ "eval_f1_macro": 0.8537539029854382,
+ "eval_loss": 0.23388919234275818,
+ "eval_pr_auc": 0.7657504791267543,
+ "eval_precision": 0.7452770780856424,
+ "eval_precision_macro": 0.8503853253634615,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.8572350725704758,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.381,
+ "eval_steps_per_second": 5.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9497348546111616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7550662198819211,
+ "eval_f1_macro": 0.8543240621923138,
+ "eval_loss": 0.2337103933095932,
+ "eval_pr_auc": 0.7663000808208629,
+ "eval_precision": 0.7473152242577384,
+ "eval_precision_macro": 0.8513875842534602,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8573454585736473,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.107,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5166
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9218527557453732,
+ "eval_auc": 0.9498016974139991,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7548253309937789,
+ "eval_f1_macro": 0.8541719723587154,
+ "eval_loss": 0.23359474539756775,
+ "eval_pr_auc": 0.7666273574525592,
+ "eval_precision": 0.7468434343434344,
+ "eval_precision_macro": 0.8511489898989899,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8572850976151154,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.819,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5184
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9216493797030709,
+ "eval_auc": 0.9498063690134986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7546569017672345,
+ "eval_f1_macro": 0.8540188154275592,
+ "eval_loss": 0.23359988629817963,
+ "eval_pr_auc": 0.7666725529217715,
+ "eval_precision": 0.7452830188679245,
+ "eval_precision_macro": 0.8504738723645784,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8576886070218773,
+ "eval_runtime": 0.2712,
+ "eval_samples_per_second": 600.959,
+ "eval_steps_per_second": 3.687,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5202
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9498248412965191,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7543692405465523,
+ "eval_f1_macro": 0.8537901526260393,
+ "eval_loss": 0.23362942039966583,
+ "eval_pr_auc": 0.7666566917414745,
+ "eval_precision": 0.7435014093329158,
+ "eval_precision_macro": 0.8496869717377781,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7655594969364721,
+ "eval_recall_macro": 0.8580619359493733,
+ "eval_runtime": 0.2158,
+ "eval_samples_per_second": 755.492,
+ "eval_steps_per_second": 4.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5220
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9499087646350263,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554564282300462,
+ "eval_f1_macro": 0.8545121458733945,
+ "eval_loss": 0.23345860838890076,
+ "eval_pr_auc": 0.767032300253484,
+ "eval_precision": 0.746536523929471,
+ "eval_precision_macro": 0.8511363192046093,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8580007477284299,
+ "eval_runtime": 0.2582,
+ "eval_samples_per_second": 631.391,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5238
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9226154159040065,
+ "eval_auc": 0.9500412726083274,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7567135549872123,
+ "eval_f1_macro": 0.8553519407342349,
+ "eval_loss": 0.23320625722408295,
+ "eval_pr_auc": 0.7678050059622479,
+ "eval_precision": 0.7502377179080824,
+ "eval_precision_macro": 0.8528939452496871,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.857868862635062,
+ "eval_runtime": 0.2022,
+ "eval_samples_per_second": 806.124,
+ "eval_steps_per_second": 4.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5256
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500337688516315,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7551801083838062,
+ "eval_f1_macro": 0.8543609694420392,
+ "eval_loss": 0.23323103785514832,
+ "eval_pr_auc": 0.7676369094541509,
+ "eval_precision": 0.7466120390797353,
+ "eval_precision_macro": 0.8511174775574487,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8577084515872508,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.373,
+ "eval_steps_per_second": 4.07,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5274
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500722803500048,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554140127388536,
+ "eval_f1_macro": 0.8544737053045671,
+ "eval_loss": 0.23318050801753998,
+ "eval_pr_auc": 0.7677669872107012,
+ "eval_precision": 0.7461465869770368,
+ "eval_precision_macro": 0.8509676473001504,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581016250801203,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.765,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5292
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9223103518405532,
+ "eval_auc": 0.9501381693679445,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7562998405103668,
+ "eval_f1_macro": 0.8550446157127531,
+ "eval_loss": 0.23305083811283112,
+ "eval_pr_auc": 0.7681080957655504,
+ "eval_precision": 0.7481855474913222,
+ "eval_precision_macro": 0.8519702208636682,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582120110832919,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.885,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5310
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9220052877770999,
+ "eval_auc": 0.9501577511558462,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7556546670914304,
+ "eval_f1_macro": 0.8546256879531203,
+ "eval_loss": 0.23304298520088196,
+ "eval_pr_auc": 0.7681639014007426,
+ "eval_precision": 0.7466163046899591,
+ "eval_precision_macro": 0.8512051870912047,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581619860386525,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 730.092,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5328
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9502301512155882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7564981661616967,
+ "eval_f1_macro": 0.8551582014839937,
+ "eval_loss": 0.23290005326271057,
+ "eval_pr_auc": 0.7685493782671796,
+ "eval_precision": 0.7482649842271294,
+ "eval_precision_macro": 0.8520389050120978,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8583732493935144,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.312,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5346
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9224628838722798,
+ "eval_auc": 0.9503260649928105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756739511883873,
+ "eval_f1_macro": 0.8553105125875349,
+ "eval_loss": 0.23275841772556305,
+ "eval_pr_auc": 0.768979970846171,
+ "eval_precision": 0.7487373737373737,
+ "eval_precision_macro": 0.8522777777777777,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8584336103520465,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.056,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5364
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9224120398617043,
+ "eval_auc": 0.9503799246420391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565411614550096,
+ "eval_f1_macro": 0.8551969143430849,
+ "eval_loss": 0.23264609277248383,
+ "eval_pr_auc": 0.7692428666566218,
+ "eval_precision": 0.748658035996211,
+ "eval_precision_macro": 0.8522091464751675,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582723720418239,
+ "eval_runtime": 0.2384,
+ "eval_samples_per_second": 683.765,
+ "eval_steps_per_second": 4.195,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9503983969250598,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565758010521282,
+ "eval_f1_macro": 0.8551956221484214,
+ "eval_loss": 0.23263320326805115,
+ "eval_pr_auc": 0.7693119480202146,
+ "eval_precision": 0.748108448928121,
+ "eval_precision_macro": 0.8519882690809373,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7652370203160271,
+ "eval_recall_macro": 0.8585043072244709,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.523,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5400
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9225137278828553,
+ "eval_auc": 0.9504205091626904,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7572475310608474,
+ "eval_f1_macro": 0.855573369257207,
+ "eval_loss": 0.2325783669948578,
+ "eval_pr_auc": 0.7694562109808358,
+ "eval_precision": 0.7481901164620711,
+ "eval_precision_macro": 0.8521436908185075,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.859119079986095,
+ "eval_runtime": 0.2413,
+ "eval_samples_per_second": 675.443,
+ "eval_steps_per_second": 4.144,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5418
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504706120673215,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756808408982322,
+ "eval_f1_macro": 0.8553077347570655,
+ "eval_loss": 0.23251411318778992,
+ "eval_pr_auc": 0.7697272239104135,
+ "eval_precision": 0.7476400251730648,
+ "eval_precision_macro": 0.8518369925744038,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7662044501773622,
+ "eval_recall_macro": 0.8588974807173404,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 602.022,
+ "eval_steps_per_second": 3.693,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5436
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504959749596038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7571178622554477,
+ "eval_f1_macro": 0.8554568705510044,
+ "eval_loss": 0.23248492181301117,
+ "eval_pr_auc": 0.7698215821647963,
+ "eval_precision": 0.7470182046453233,
+ "eval_precision_macro": 0.8516367567335341,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8594217120411665,
+ "eval_runtime": 0.2271,
+ "eval_samples_per_second": 717.802,
+ "eval_steps_per_second": 4.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5454
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9228187919463087,
+ "eval_auc": 0.9505587106478812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7579719387755102,
+ "eval_f1_macro": 0.8560304891070873,
+ "eval_loss": 0.23235370218753815,
+ "eval_pr_auc": 0.7701546218803492,
+ "eval_precision": 0.749605802585935,
+ "eval_precision_macro": 0.8528595176474563,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8593001628616911,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.772,
+ "eval_steps_per_second": 5.41,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5472
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9506673837312365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7583120204603581,
+ "eval_f1_macro": 0.8563023222011585,
+ "eval_loss": 0.23212042450904846,
+ "eval_pr_auc": 0.7706813722507476,
+ "eval_precision": 0.7518225039619651,
+ "eval_precision_macro": 0.8538377341465491,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8588259565825047,
+ "eval_runtime": 0.1758,
+ "eval_samples_per_second": 927.397,
+ "eval_steps_per_second": 5.69,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5490
+ },
+ {
+ "epoch": 305.55555555555554,
+ "grad_norm": 15827.6396484375,
+ "learning_rate": 3.943376017723057e-07,
+ "loss": 0.1954,
+ "step": 5500
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9506862453142154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591823698498882,
+ "eval_f1_macro": 0.8567938214370079,
+ "eval_loss": 0.23211389780044556,
+ "eval_pr_auc": 0.7707434518060764,
+ "eval_precision": 0.7519772223979754,
+ "eval_precision_macro": 0.8540585209342515,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8596019676543512,
+ "eval_runtime": 0.2349,
+ "eval_samples_per_second": 693.884,
+ "eval_steps_per_second": 4.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5508
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9506972041080411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588075880758808,
+ "eval_f1_macro": 0.8565232326853711,
+ "eval_loss": 0.23213696479797363,
+ "eval_pr_auc": 0.7706724583082463,
+ "eval_precision": 0.7503152585119798,
+ "eval_precision_macro": 0.8533038465207814,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8598442387508907,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 608.072,
+ "eval_steps_per_second": 3.731,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5526
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9229204799674599,
+ "eval_auc": 0.9507381389986547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.758521822236381,
+ "eval_f1_macro": 0.8563315143004762,
+ "eval_loss": 0.23207640647888184,
+ "eval_pr_auc": 0.7708824410889222,
+ "eval_precision": 0.7494491658797607,
+ "eval_precision_macro": 0.8528944938003498,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598847551440492,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.193,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5544
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9508327778185136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7593360995850622,
+ "eval_f1_macro": 0.8568679288369823,
+ "eval_loss": 0.2318853884935379,
+ "eval_pr_auc": 0.7713979747932131,
+ "eval_precision": 0.7516587677725118,
+ "eval_precision_macro": 0.8539545732457663,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8598640833162641,
+ "eval_runtime": 0.3164,
+ "eval_samples_per_second": 515.146,
+ "eval_steps_per_second": 3.16,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5562
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9508632794702453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7587747287811104,
+ "eval_f1_macro": 0.8565253830188363,
+ "eval_loss": 0.23183651268482208,
+ "eval_pr_auc": 0.7715329415149417,
+ "eval_precision": 0.7508683296495106,
+ "eval_precision_macro": 0.8535264016588866,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8596123035682436,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.635,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5580
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509030269959862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588844621513944,
+ "eval_f1_macro": 0.8565602855810055,
+ "eval_loss": 0.23182560503482819,
+ "eval_pr_auc": 0.7716106695707178,
+ "eval_precision": 0.7501575299306869,
+ "eval_precision_macro": 0.8532526463767658,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8599752965818471,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.932,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5598
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9509431638216853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591589678241478,
+ "eval_f1_macro": 0.8567105868221108,
+ "eval_loss": 0.23177149891853333,
+ "eval_pr_auc": 0.7718084005522707,
+ "eval_precision": 0.7500786905886057,
+ "eval_precision_macro": 0.853269895291271,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602675927230261,
+ "eval_runtime": 0.2364,
+ "eval_samples_per_second": 689.439,
+ "eval_steps_per_second": 4.23,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509867264870173,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7590380633858895,
+ "eval_f1_macro": 0.856634317411552,
+ "eval_loss": 0.23171813786029816,
+ "eval_pr_auc": 0.771954368377823,
+ "eval_precision": 0.749842668344871,
+ "eval_precision_macro": 0.8531505640086999,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602374122437602,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.85,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5634
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9228696359568843,
+ "eval_auc": 0.9509922740114228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7584010192705845,
+ "eval_f1_macro": 0.8562552937959844,
+ "eval_loss": 0.23173367977142334,
+ "eval_pr_auc": 0.7720376545527768,
+ "eval_precision": 0.7492133417243549,
+ "eval_precision_macro": 0.8527752578846153,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598545746647831,
+ "eval_runtime": 0.2664,
+ "eval_samples_per_second": 611.795,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5652
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9233780760626398,
+ "eval_auc": 0.9510762654774227,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7595340673368438,
+ "eval_f1_macro": 0.8569813431425517,
+ "eval_loss": 0.23158977925777435,
+ "eval_pr_auc": 0.7725350282702966,
+ "eval_precision": 0.7517372078332281,
+ "eval_precision_macro": 0.8540227670483556,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600253216264866,
+ "eval_runtime": 0.2525,
+ "eval_samples_per_second": 645.435,
+ "eval_steps_per_second": 3.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5670
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9511527434542277,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7601405301820504,
+ "eval_f1_macro": 0.8573636072933594,
+ "eval_loss": 0.23149563372135162,
+ "eval_pr_auc": 0.7728791764414321,
+ "eval_precision": 0.7529262891490035,
+ "eval_precision_macro": 0.8546239248495366,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8601762240228168,
+ "eval_runtime": 0.2078,
+ "eval_samples_per_second": 784.391,
+ "eval_steps_per_second": 4.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5688
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.923581452104942,
+ "eval_auc": 0.9511918583675363,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7598657932577089,
+ "eval_f1_macro": 0.8572131820235396,
+ "eval_loss": 0.23143813014030457,
+ "eval_pr_auc": 0.7730966214358813,
+ "eval_precision": 0.7530082330588981,
+ "eval_precision_macro": 0.8546082958147307,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8598839278816377,
+ "eval_runtime": 0.2098,
+ "eval_samples_per_second": 777.022,
+ "eval_steps_per_second": 4.767,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5706
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.923479764083791,
+ "eval_auc": 0.9512240340090886,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7597765363128491,
+ "eval_f1_macro": 0.8571341935895835,
+ "eval_loss": 0.23142649233341217,
+ "eval_pr_auc": 0.7731516186784236,
+ "eval_precision": 0.7522123893805309,
+ "eval_precision_macro": 0.8542630051604545,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600856825850187,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.633,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5724
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9512825457928188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7600638977635783,
+ "eval_f1_macro": 0.8573266640831436,
+ "eval_loss": 0.2313271462917328,
+ "eval_pr_auc": 0.7734563478045352,
+ "eval_precision": 0.7530864197530864,
+ "eval_precision_macro": 0.8546763493762101,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8600451661918602,
+ "eval_runtime": 0.2154,
+ "eval_samples_per_second": 756.779,
+ "eval_steps_per_second": 4.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5742
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9513326194999532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7605048729829046,
+ "eval_f1_macro": 0.8575931868618003,
+ "eval_loss": 0.23122872412204742,
+ "eval_pr_auc": 0.7736938128704516,
+ "eval_precision": 0.7536415452818239,
+ "eval_precision_macro": 0.8549855212781016,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8602667654606148,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.713,
+ "eval_steps_per_second": 4.256,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5760
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9513477438033324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602171136653896,
+ "eval_f1_macro": 0.8574005258699469,
+ "eval_loss": 0.2312333732843399,
+ "eval_pr_auc": 0.773709338696213,
+ "eval_precision": 0.7527663610496365,
+ "eval_precision_macro": 0.8545716082739852,
+ "eval_pred_class_0": 16505,
+ "eval_pred_class_1": 3163,
+ "eval_predicted_binding_ratio": 0.16081960545047794,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8603072818537733,
+ "eval_runtime": 0.2276,
+ "eval_samples_per_second": 716.105,
+ "eval_steps_per_second": 4.393,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5778
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9235306080943665,
+ "eval_auc": 0.9513562889374167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602040816326531,
+ "eval_f1_macro": 0.8573582711574831,
+ "eval_loss": 0.23121465742588043,
+ "eval_pr_auc": 0.7737540605936648,
+ "eval_precision": 0.7518133081046988,
+ "eval_precision_macro": 0.854175430193466,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8606400943881107,
+ "eval_runtime": 0.2488,
+ "eval_samples_per_second": 655.047,
+ "eval_steps_per_second": 4.019,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5796
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514191998106756,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23111233115196228,
+ "eval_pr_auc": 0.7739781495758574,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2355,
+ "eval_samples_per_second": 692.067,
+ "eval_steps_per_second": 4.246,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5814
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514517842171841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23104801774024963,
+ "eval_pr_auc": 0.7741130008089699,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.295,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5832
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.951504651151519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606128311522502,
+ "eval_f1_macro": 0.857627250169412,
+ "eval_loss": 0.23095941543579102,
+ "eval_pr_auc": 0.7744096919390852,
+ "eval_precision": 0.7529225908372827,
+ "eval_precision_macro": 0.8547076748648027,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8606297584742182,
+ "eval_runtime": 0.2175,
+ "eval_samples_per_second": 749.374,
+ "eval_steps_per_second": 4.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.951546208922066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606578317100431,
+ "eval_f1_macro": 0.8576669257120046,
+ "eval_loss": 0.23088191449642181,
+ "eval_pr_auc": 0.774641356281408,
+ "eval_precision": 0.7533206831119544,
+ "eval_precision_macro": 0.8548803827531177,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7681393099000322,
+ "eval_recall_macro": 0.8605288811225278,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.56,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5868
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9515728175742149,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7608105951811074,
+ "eval_f1_macro": 0.8577405662711912,
+ "eval_loss": 0.23083868622779846,
+ "eval_pr_auc": 0.7747608129205691,
+ "eval_precision": 0.7530006317119393,
+ "eval_precision_macro": 0.8547756764183257,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8607909967844407,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.065,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5886
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9516116113150578,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7611607142857143,
+ "eval_f1_macro": 0.8579273206076528,
+ "eval_loss": 0.23078228533267975,
+ "eval_pr_auc": 0.7749744562806883,
+ "eval_precision": 0.7527593818984547,
+ "eval_precision_macro": 0.8547393927131844,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7697516930022573,
+ "eval_recall_macro": 0.8612143507565763,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.541,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5904
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9238865161683953,
+ "eval_auc": 0.9516614027797223,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7614342629482072,
+ "eval_f1_macro": 0.8580771629311073,
+ "eval_loss": 0.2307167798280716,
+ "eval_pr_auc": 0.7752067172424865,
+ "eval_precision": 0.7526780088216761,
+ "eval_precision_macro": 0.8547553982510223,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7703966462431474,
+ "eval_recall_macro": 0.8615066468977552,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.134,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5922
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9516455582714203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7612841703750794,
+ "eval_f1_macro": 0.857914812460267,
+ "eval_loss": 0.23076769709587097,
+ "eval_pr_auc": 0.775112377066796,
+ "eval_precision": 0.750548417424005,
+ "eval_precision_macro": 0.8538504058352652,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8621420914871643,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.508,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5940
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9516963035209824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621776504297995,
+ "eval_f1_macro": 0.8584894423868002,
+ "eval_loss": 0.23067235946655273,
+ "eval_pr_auc": 0.7753820734835624,
+ "eval_precision": 0.7525935240490412,
+ "eval_precision_macro": 0.8548556265844769,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7720090293453724,
+ "eval_recall_macro": 0.8622524774903357,
+ "eval_runtime": 0.1824,
+ "eval_samples_per_second": 893.499,
+ "eval_steps_per_second": 5.482,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5958
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9517492385828104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619502868068834,
+ "eval_f1_macro": 0.8583798620967267,
+ "eval_loss": 0.23056790232658386,
+ "eval_pr_auc": 0.775607049366595,
+ "eval_precision": 0.7530708661417322,
+ "eval_precision_macro": 0.8550111500417023,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7710415994840374,
+ "eval_recall_macro": 0.8618593039974662,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.679,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5976
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9517777256072577,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621475227019276,
+ "eval_f1_macro": 0.8584929210351648,
+ "eval_loss": 0.23053352534770966,
+ "eval_pr_auc": 0.7757600766000483,
+ "eval_precision": 0.7531486146095718,
+ "eval_precision_macro": 0.855079036870636,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7713640761044824,
+ "eval_recall_macro": 0.8620205423076888,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.883,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5994
+ },
+ {
+ "epoch": 333.3333333333333,
+ "grad_norm": 16736.6328125,
+ "learning_rate": 3.021381973636964e-07,
+ "loss": 0.1913,
+ "step": 6000
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9517933365355851,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616473207187152,
+ "eval_f1_macro": 0.8581438407085573,
+ "eval_loss": 0.23052088916301727,
+ "eval_pr_auc": 0.7758187649274527,
+ "eval_precision": 0.751254705144291,
+ "eval_precision_macro": 0.8542074496595242,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8622326329249622,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.473,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6012
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9518365877609504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619198982835347,
+ "eval_f1_macro": 0.8582932017746205,
+ "eval_loss": 0.23045583069324493,
+ "eval_pr_auc": 0.7760158372270667,
+ "eval_precision": 0.7511751801942964,
+ "eval_precision_macro": 0.8542244778801185,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8625249290661412,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.186,
+ "eval_steps_per_second": 4.566,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6030
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9518931822423861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624661462482077,
+ "eval_f1_macro": 0.8586824817571539,
+ "eval_loss": 0.23035065829753876,
+ "eval_pr_auc": 0.7762652864261658,
+ "eval_precision": 0.753463476070529,
+ "eval_precision_macro": 0.855266785330923,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7716865527249275,
+ "eval_recall_macro": 0.8622119610971773,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.881,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6048
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9519021264089276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7623289850461342,
+ "eval_f1_macro": 0.8585623745200415,
+ "eval_loss": 0.2303379327058792,
+ "eval_pr_auc": 0.7763353590359,
+ "eval_precision": 0.752276295133438,
+ "eval_precision_macro": 0.8547524774823897,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625145931522488,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.828,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6066
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519062432559864,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23036180436611176,
+ "eval_pr_auc": 0.7763420780879606,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.319,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.9519363653402588,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7618291521117815,
+ "eval_f1_macro": 0.8582134440261069,
+ "eval_loss": 0.23031854629516602,
+ "eval_pr_auc": 0.776475073481046,
+ "eval_precision": 0.7503909915545824,
+ "eval_precision_macro": 0.8538853142461151,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8627266837695222,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.603,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6102
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519665750170216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23026354610919952,
+ "eval_pr_auc": 0.7766276763039114,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.893,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6120
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9520129601070512,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.230192169547081,
+ "eval_pr_auc": 0.7768138361852162,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.971,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6138
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9239882041895465,
+ "eval_auc": 0.9520488340982072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7622833518842423,
+ "eval_f1_macro": 0.8585223761569667,
+ "eval_loss": 0.2301386296749115,
+ "eval_pr_auc": 0.7769598633905366,
+ "eval_precision": 0.7518820577164367,
+ "eval_precision_macro": 0.8545818055572474,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626154705039393,
+ "eval_runtime": 0.2473,
+ "eval_samples_per_second": 659.185,
+ "eval_steps_per_second": 4.044,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6156
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9241407362212731,
+ "eval_auc": 0.952100825107636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7626471524021635,
+ "eval_f1_macro": 0.8587517153841377,
+ "eval_loss": 0.23005619645118713,
+ "eval_pr_auc": 0.7772456933395511,
+ "eval_precision": 0.7525902668759812,
+ "eval_precision_macro": 0.8549397976374689,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627060119417373,
+ "eval_runtime": 0.2129,
+ "eval_samples_per_second": 765.55,
+ "eval_steps_per_second": 4.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6174
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521250006350455,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.23000310361385345,
+ "eval_pr_auc": 0.7773917675410515,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.276,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6192
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9521700231752211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7627684964200477,
+ "eval_f1_macro": 0.8588281984687149,
+ "eval_loss": 0.22992061078548431,
+ "eval_pr_auc": 0.777567865132197,
+ "eval_precision": 0.7528266331658291,
+ "eval_precision_macro": 0.8550592763014295,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627361924210033,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.371,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6210
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521951914175242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624502784407319,
+ "eval_f1_macro": 0.8586388332084449,
+ "eval_loss": 0.22986458241939545,
+ "eval_pr_auc": 0.7777181268262345,
+ "eval_precision": 0.7525125628140703,
+ "eval_precision_macro": 0.8548719086819685,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625447736315148,
+ "eval_runtime": 0.2315,
+ "eval_samples_per_second": 704.221,
+ "eval_steps_per_second": 4.32,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6228
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9522022182817713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.22987791895866394,
+ "eval_pr_auc": 0.7777283636078421,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2021,
+ "eval_samples_per_second": 806.414,
+ "eval_steps_per_second": 4.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6246
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9522300142987927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7634340222575516,
+ "eval_f1_macro": 0.8592029398342167,
+ "eval_loss": 0.229818195104599,
+ "eval_pr_auc": 0.7778254023800715,
+ "eval_precision": 0.7529005957980558,
+ "eval_precision_macro": 0.8552111450378106,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7742663656884876,
+ "eval_recall_macro": 0.8633509651826273,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.887,
+ "eval_steps_per_second": 4.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6264
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9522554939810626,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22978660464286804,
+ "eval_pr_auc": 0.7779049389173774,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.696,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6282
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9522950273918264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638270820089001,
+ "eval_f1_macro": 0.859428369717681,
+ "eval_loss": 0.2297380119562149,
+ "eval_pr_auc": 0.7780796039517833,
+ "eval_precision": 0.7530554685051708,
+ "eval_precision_macro": 0.855346694014678,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8636734418030723,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.348,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6300
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523093341652933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636883034438978,
+ "eval_f1_macro": 0.8593074482256571,
+ "eval_loss": 0.22972844541072845,
+ "eval_pr_auc": 0.7781517759374512,
+ "eval_precision": 0.751875,
+ "eval_precision_macro": 0.8548359697595336,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639760738581439,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.342,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.9242424242424242,
+ "eval_auc": 0.952317528929415,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7635671215487146,
+ "eval_f1_macro": 0.8592310391299909,
+ "eval_loss": 0.22972537577152252,
+ "eval_pr_auc": 0.7781868067856106,
+ "eval_precision": 0.7516401124648547,
+ "eval_precision_macro": 0.8547172445484534,
+ "eval_pred_class_0": 16467,
+ "eval_pred_class_1": 3201,
+ "eval_predicted_binding_ratio": 0.16275167785234898,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639458933788778,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.7,
+ "eval_steps_per_second": 4.274,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6336
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9523571596651685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.2296588122844696,
+ "eval_pr_auc": 0.7784014772150735,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.953,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6354
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.952381568772553,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.22961482405662537,
+ "eval_pr_auc": 0.7785102930543456,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.236,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6372
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523906199965831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763838223632038,
+ "eval_f1_macro": 0.8593796791618457,
+ "eval_loss": 0.2295987904071808,
+ "eval_pr_auc": 0.7785825605072106,
+ "eval_precision": 0.7515605493133583,
+ "eval_precision_macro": 0.8547343562893321,
+ "eval_pred_class_0": 16464,
+ "eval_pred_class_1": 3204,
+ "eval_predicted_binding_ratio": 0.16290420988407567,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.8642381895200568,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.418,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6390
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9524596428791869,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636305833730727,
+ "eval_f1_macro": 0.8593156699585895,
+ "eval_loss": 0.229468435049057,
+ "eval_pr_auc": 0.7789122838326235,
+ "eval_precision": 0.7529780564263323,
+ "eval_precision_macro": 0.8552789299002641,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635122034928499,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.191,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6408
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524861542063461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.2294115126132965,
+ "eval_pr_auc": 0.7790142584142796,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.426,
+ "eval_steps_per_second": 3.911,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6426
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524829619466881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7639771283354511,
+ "eval_f1_macro": 0.8595006707052558,
+ "eval_loss": 0.2294154018163681,
+ "eval_pr_auc": 0.7789936192429844,
+ "eval_precision": 0.7527386541471048,
+ "eval_precision_macro": 0.8552438490185533,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7755562721702677,
+ "eval_recall_macro": 0.8639355574649854,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.738,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6444
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525149624032593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.2293538749217987,
+ "eval_pr_auc": 0.7791661873069065,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.132,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6462
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525288798767679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22932648658752441,
+ "eval_pr_auc": 0.7792072068588576,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2714,
+ "eval_samples_per_second": 600.631,
+ "eval_steps_per_second": 3.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6480
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9525697758373857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.22925521433353424,
+ "eval_pr_auc": 0.77936321808712,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.66,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6498
+ },
+ {
+ "epoch": 361.1111111111111,
+ "grad_norm": 21180.3203125,
+ "learning_rate": 2.1735650901333336e-07,
+ "loss": 0.1893,
+ "step": 6500
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9526172898972942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636016544702513,
+ "eval_f1_macro": 0.8593197379764267,
+ "eval_loss": 0.22917793691158295,
+ "eval_pr_auc": 0.7795955328857882,
+ "eval_precision": 0.7535321821036107,
+ "eval_precision_macro": 0.8555017581027062,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8632802683102028,
+ "eval_runtime": 0.208,
+ "eval_samples_per_second": 783.548,
+ "eval_steps_per_second": 4.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6516
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.9526382926300437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636479388826993,
+ "eval_f1_macro": 0.8593600478608577,
+ "eval_loss": 0.22912409901618958,
+ "eval_pr_auc": 0.7796869947527124,
+ "eval_precision": 0.7539283469516027,
+ "eval_precision_macro": 0.8556733812884909,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8631793909585124,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.513,
+ "eval_steps_per_second": 3.764,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6534
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.952662049659998,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7637231503579952,
+ "eval_f1_macro": 0.859396294249525,
+ "eval_loss": 0.22910362482070923,
+ "eval_pr_auc": 0.779788701256993,
+ "eval_precision": 0.7537688442211056,
+ "eval_precision_macro": 0.8556213791598126,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633104487894689,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.175,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9526903420344663,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638446849140674,
+ "eval_f1_macro": 0.8594728689002142,
+ "eval_loss": 0.2290574461221695,
+ "eval_pr_auc": 0.7799354479263911,
+ "eval_precision": 0.7540056550424128,
+ "eval_precision_macro": 0.8557410744123195,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633406292687349,
+ "eval_runtime": 0.2049,
+ "eval_samples_per_second": 795.329,
+ "eval_steps_per_second": 4.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6570
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9527021767531981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7640699523052464,
+ "eval_f1_macro": 0.8595814265550925,
+ "eval_loss": 0.22903695702552795,
+ "eval_pr_auc": 0.7799769457420497,
+ "eval_precision": 0.753527751646284,
+ "eval_precision_macro": 0.8555854062558139,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8637338027616044,
+ "eval_runtime": 0.2118,
+ "eval_samples_per_second": 769.509,
+ "eval_steps_per_second": 4.721,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6588
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9247000203376042,
+ "eval_auc": 0.9527121622971282,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.764659145081837,
+ "eval_f1_macro": 0.8599193797618125,
+ "eval_loss": 0.22902432084083557,
+ "eval_pr_auc": 0.7800307850119022,
+ "eval_precision": 0.7537593984962406,
+ "eval_precision_macro": 0.8557884149558164,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864217517692272,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.072,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6606
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9527291941703031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7649769585253456,
+ "eval_f1_macro": 0.8601085500794873,
+ "eval_loss": 0.22899393737316132,
+ "eval_pr_auc": 0.7800749822284204,
+ "eval_precision": 0.7540726817042607,
+ "eval_precision_macro": 0.85597540373147,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644089364817604,
+ "eval_runtime": 0.2331,
+ "eval_samples_per_second": 699.217,
+ "eval_steps_per_second": 4.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6624
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527588782921224,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22894835472106934,
+ "eval_pr_auc": 0.780237083741907,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.902,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6642
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527729125556185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22892294824123383,
+ "eval_pr_auc": 0.7803195480022762,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.873,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6660
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527883872289603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650985378258105,
+ "eval_f1_macro": 0.8601851483463878,
+ "eval_loss": 0.22889479994773865,
+ "eval_pr_auc": 0.7804143746656889,
+ "eval_precision": 0.7543089940457537,
+ "eval_precision_macro": 0.8560948381043845,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644391169610264,
+ "eval_runtime": 0.2368,
+ "eval_samples_per_second": 688.489,
+ "eval_steps_per_second": 4.224,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6678
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9528159885960027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653418124006359,
+ "eval_f1_macro": 0.860338399996844,
+ "eval_loss": 0.22885586321353912,
+ "eval_pr_auc": 0.7805625189044384,
+ "eval_precision": 0.7547820633427407,
+ "eval_precision_macro": 0.8563339286918206,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644994779195585,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.352,
+ "eval_steps_per_second": 5.407,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6696
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9528519015171544,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655852417302799,
+ "eval_f1_macro": 0.8604917251982311,
+ "eval_loss": 0.22878196835517883,
+ "eval_pr_auc": 0.7807742707975688,
+ "eval_precision": 0.7552557263884531,
+ "eval_precision_macro": 0.8565733155332836,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645598388780904,
+ "eval_runtime": 0.2727,
+ "eval_samples_per_second": 597.724,
+ "eval_steps_per_second": 3.667,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6714
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528673372605004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657070144743121,
+ "eval_f1_macro": 0.8605684154038177,
+ "eval_loss": 0.22877708077430725,
+ "eval_pr_auc": 0.7808321396342274,
+ "eval_precision": 0.7554927809165097,
+ "eval_precision_macro": 0.85669312022406,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645900193573565,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.842,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6732
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528682131854067,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659303988558716,
+ "eval_f1_macro": 0.8606760610325117,
+ "eval_loss": 0.2287902534008026,
+ "eval_pr_auc": 0.7808487230478494,
+ "eval_precision": 0.7550125313283208,
+ "eval_precision_macro": 0.8565363700584309,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8649831928502261,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.251,
+ "eval_steps_per_second": 5.388,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.9250050844010576,
+ "eval_auc": 0.9528953279275011,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655380702591003,
+ "eval_f1_macro": 0.8604509839871686,
+ "eval_loss": 0.228745236992836,
+ "eval_pr_auc": 0.7809833435589818,
+ "eval_precision": 0.754858934169279,
+ "eval_precision_macro": 0.8564014297014619,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864660716229781,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.614,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6768
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9529040482465667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7651262505955216,
+ "eval_f1_macro": 0.8601804865980422,
+ "eval_loss": 0.2287396788597107,
+ "eval_pr_auc": 0.781048839864553,
+ "eval_precision": 0.7537546933667084,
+ "eval_precision_macro": 0.8558720042841312,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8646710521436733,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.891,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9529144620204507,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653968253968254,
+ "eval_f1_macro": 0.8603288764349426,
+ "eval_loss": 0.22873102128505707,
+ "eval_pr_auc": 0.7810643100928254,
+ "eval_precision": 0.7536730228196311,
+ "eval_precision_macro": 0.8558880628094148,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8649633482848524,
+ "eval_runtime": 0.1843,
+ "eval_samples_per_second": 884.276,
+ "eval_steps_per_second": 5.425,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6804
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9529275424990492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657142857142857,
+ "eval_f1_macro": 0.8605178766021483,
+ "eval_loss": 0.22870197892189026,
+ "eval_pr_auc": 0.7811376412515475,
+ "eval_precision": 0.7539856205064083,
+ "eval_precision_macro": 0.8560747217232387,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8651547670743409,
+ "eval_runtime": 0.2724,
+ "eval_samples_per_second": 598.424,
+ "eval_steps_per_second": 3.671,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6822
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9529545793811519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660047656870532,
+ "eval_f1_macro": 0.8607118952674847,
+ "eval_loss": 0.22864677011966705,
+ "eval_pr_auc": 0.7812048860219004,
+ "eval_precision": 0.7548528490920476,
+ "eval_precision_macro": 0.8564843339790698,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8651142506811824,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.062,
+ "eval_steps_per_second": 5.387,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6840
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9529760687388493,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658087067047982,
+ "eval_f1_macro": 0.8605994081311654,
+ "eval_loss": 0.2286224663257599,
+ "eval_pr_auc": 0.7813066136655398,
+ "eval_precision": 0.7547760726589414,
+ "eval_precision_macro": 0.8564168678924449,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.86495301237096,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.403,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6858
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530162639595422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22855480015277863,
+ "eval_pr_auc": 0.7815011919374028,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.397,
+ "eval_steps_per_second": 3.745,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6876
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530368384623376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22851014137268066,
+ "eval_pr_auc": 0.7815937686262128,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.766,
+ "eval_steps_per_second": 3.729,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6894
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530639240069352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22846660017967224,
+ "eval_pr_auc": 0.7817366980630457,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.734,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6912
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530865131370146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22843268513679504,
+ "eval_pr_auc": 0.7818405401720232,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.559,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6930
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530828926474026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659033078880407,
+ "eval_f1_macro": 0.8606810172942987,
+ "eval_loss": 0.22846029698848724,
+ "eval_pr_auc": 0.781765942321457,
+ "eval_precision": 0.7555695010982115,
+ "eval_precision_macro": 0.8567605408530922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864751257667579,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.151,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6948
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9530829315773984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665184243964421,
+ "eval_f1_macro": 0.8610134494863566,
+ "eval_loss": 0.22847168147563934,
+ "eval_pr_auc": 0.7818060776753192,
+ "eval_precision": 0.7552425665101722,
+ "eval_precision_macro": 0.8567386267869261,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8654669077808935,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.429,
+ "eval_steps_per_second": 4.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6966
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.953101403860419,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668838391863976,
+ "eval_f1_macro": 0.861243571985536,
+ "eval_loss": 0.2284410148859024,
+ "eval_pr_auc": 0.7818375459402719,
+ "eval_precision": 0.7559523809523809,
+ "eval_precision_macro": 0.8570973363853918,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655574492186915,
+ "eval_runtime": 0.2357,
+ "eval_samples_per_second": 691.702,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6984
+ },
+ {
+ "epoch": 388.8888888888889,
+ "grad_norm": 17393.9921875,
+ "learning_rate": 1.4317094954644378e-07,
+ "loss": 0.1876,
+ "step": 7000
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9531112726143616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671537484116899,
+ "eval_f1_macro": 0.8613916441816318,
+ "eval_loss": 0.22843530774116516,
+ "eval_pr_auc": 0.7818710932290109,
+ "eval_precision": 0.755868544600939,
+ "eval_precision_macro": 0.8571123212290193,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8658497453598706,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.309,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7002
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9531541734697645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.2283545583486557,
+ "eval_pr_auc": 0.7820972392670548,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.253,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9531671274258764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665394402035624,
+ "eval_f1_macro": 0.8610596014864338,
+ "eval_loss": 0.22832486033439636,
+ "eval_pr_auc": 0.7821584295330316,
+ "eval_precision": 0.7561970505177282,
+ "eval_precision_macro": 0.8571349914927091,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.865134095246556,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.118,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7038
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.953186076601346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22829268872737885,
+ "eval_pr_auc": 0.7822636305739935,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.357,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7056
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532213958400613,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.2282164841890335,
+ "eval_pr_auc": 0.782458419213003,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.21,
+ "eval_samples_per_second": 776.053,
+ "eval_steps_per_second": 4.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7074
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9532409095004704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657542966263526,
+ "eval_f1_macro": 0.8606092068875439,
+ "eval_loss": 0.22819304466247559,
+ "eval_pr_auc": 0.7825471573946872,
+ "eval_precision": 0.7558906691800189,
+ "eval_precision_macro": 0.8568655651025967,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864489142005666,
+ "eval_runtime": 0.1952,
+ "eval_samples_per_second": 835.24,
+ "eval_steps_per_second": 5.124,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7092
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532573574237078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659980897803247,
+ "eval_f1_macro": 0.8607627043564902,
+ "eval_loss": 0.2281719297170639,
+ "eval_pr_auc": 0.7826366824043385,
+ "eval_precision": 0.7563659226658284,
+ "eval_precision_macro": 0.8571057489837905,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8645495029641981,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.649,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7110
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532628173556228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.22817298769950867,
+ "eval_pr_auc": 0.7826246984855115,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.545,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7128
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532649585053934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7662213740458015,
+ "eval_f1_macro": 0.8608703093903662,
+ "eval_loss": 0.22816696763038635,
+ "eval_pr_auc": 0.7826996340764835,
+ "eval_precision": 0.7558832758079699,
+ "eval_precision_macro": 0.8569477661729006,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8649426764570676,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.822,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7146
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532878980054353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22812943160533905,
+ "eval_pr_auc": 0.7828570635819191,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.707,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7164
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9533086671582098,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660725652450668,
+ "eval_f1_macro": 0.860798596552099,
+ "eval_loss": 0.2281065434217453,
+ "eval_pr_auc": 0.7829274286728394,
+ "eval_precision": 0.7562048382029531,
+ "eval_precision_macro": 0.8570529802176428,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646805607951544,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.497,
+ "eval_steps_per_second": 4.028,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7182
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.953327003186245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661944930765557,
+ "eval_f1_macro": 0.8608753604764983,
+ "eval_loss": 0.22807644307613373,
+ "eval_pr_auc": 0.7830440116061308,
+ "eval_precision": 0.7564424890006285,
+ "eval_precision_macro": 0.8571730824234005,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8647107412744205,
+ "eval_runtime": 0.221,
+ "eval_samples_per_second": 737.532,
+ "eval_steps_per_second": 4.525,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7200
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9533311297658027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.22808308899402618,
+ "eval_pr_auc": 0.7830491617637381,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.406,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7218
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9533512371086481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766390833863781,
+ "eval_f1_macro": 0.8609879862166538,
+ "eval_loss": 0.2280474752187729,
+ "eval_pr_auc": 0.7831121530747555,
+ "eval_precision": 0.7565190072258875,
+ "eval_precision_macro": 0.857240395332689,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864871979584643,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.294,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7236
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533569987480307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22804181277751923,
+ "eval_pr_auc": 0.7831234273165448,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.798,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9533826536152816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.2279965728521347,
+ "eval_pr_auc": 0.78327266335354,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.619,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7272
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533846682425657,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22799374163150787,
+ "eval_pr_auc": 0.7832776890714148,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.577,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7290
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533931647141554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.2279902696609497,
+ "eval_pr_auc": 0.7833151045367318,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.534,
+ "eval_steps_per_second": 4.003,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7308
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9533809990904591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22801372408866882,
+ "eval_pr_auc": 0.7832695956978404,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.354,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7326
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9533805708605049,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671058898237816,
+ "eval_f1_macro": 0.8613505657612415,
+ "eval_loss": 0.2280135303735733,
+ "eval_pr_auc": 0.7832983031509244,
+ "eval_precision": 0.7554721701063164,
+ "eval_precision_macro": 0.8569406995036744,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865950622711561,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.9,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7344
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533953253289238,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672276913305811,
+ "eval_f1_macro": 0.8614272726281818,
+ "eval_loss": 0.22799338400363922,
+ "eval_pr_auc": 0.7833637706556547,
+ "eval_precision": 0.7557084766969033,
+ "eval_precision_macro": 0.857060115344384,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865980803190827,
+ "eval_runtime": 0.2998,
+ "eval_samples_per_second": 543.699,
+ "eval_steps_per_second": 3.336,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7362
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9534269948805303,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672496025437202,
+ "eval_f1_macro": 0.8614738601594714,
+ "eval_loss": 0.22793784737586975,
+ "eval_pr_auc": 0.783492627588611,
+ "eval_precision": 0.7566635308874256,
+ "eval_precision_macro": 0.8574567123458305,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8656479906564896,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.309,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7380
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534421775789035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22790838778018951,
+ "eval_pr_auc": 0.7835790311478947,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.852,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7398
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534503918080233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22789432108402252,
+ "eval_pr_auc": 0.7836065049249683,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.102,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7416
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534621486667634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.2278737723827362,
+ "eval_pr_auc": 0.7836530325514856,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2015,
+ "eval_samples_per_second": 809.132,
+ "eval_steps_per_second": 4.964,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7434
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534808739947569,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668575063613231,
+ "eval_f1_macro": 0.8612488935825013,
+ "eval_loss": 0.22784681618213654,
+ "eval_pr_auc": 0.7837271248134856,
+ "eval_precision": 0.7565108252274867,
+ "eval_precision_macro": 0.8573222168125176,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653255140360445,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.938,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7452
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535030056973854,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.2278076857328415,
+ "eval_pr_auc": 0.783829043372685,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2388,
+ "eval_samples_per_second": 682.596,
+ "eval_steps_per_second": 4.188,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7470
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9535103634665969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.22778868675231934,
+ "eval_pr_auc": 0.7838714010488458,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.742,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 416.6666666666667,
+ "grad_norm": 16683.39453125,
+ "learning_rate": 8.236268949930852e-08,
+ "loss": 0.186,
+ "step": 7500
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535206409854957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777557373046875,
+ "eval_pr_auc": 0.7839160806088992,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.595,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7506
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953516261360965,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22778432071208954,
+ "eval_pr_auc": 0.7838825807566469,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.836,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7524
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953526694599847,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777114808559418,
+ "eval_pr_auc": 0.783922204343384,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.269,
+ "eval_steps_per_second": 3.805,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7542
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535399113334307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22775039076805115,
+ "eval_pr_auc": 0.7839806097795337,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.883,
+ "eval_steps_per_second": 3.735,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7560
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535487289774859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22773513197898865,
+ "eval_pr_auc": 0.7840321361391863,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2732,
+ "eval_samples_per_second": 596.635,
+ "eval_steps_per_second": 3.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7578
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535455951128217,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774243354797363,
+ "eval_pr_auc": 0.7840612126983214,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.347,
+ "eval_steps_per_second": 5.284,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7596
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535476389376027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774267196655273,
+ "eval_pr_auc": 0.7840628987467491,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.36,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7614
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535528458245448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670057215511761,
+ "eval_f1_macro": 0.8613203162894483,
+ "eval_loss": 0.22773417830467224,
+ "eval_pr_auc": 0.7840834648119666,
+ "eval_precision": 0.756189282356628,
+ "eval_precision_macro": 0.8572170542389439,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655876296979576,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.927,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7632
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535637559558758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.22771182656288147,
+ "eval_pr_auc": 0.7841366739811415,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.859,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7650
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535682523703939,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22770953178405762,
+ "eval_pr_auc": 0.7841596739317596,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.83,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7668
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535677852104438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674714104193139,
+ "eval_f1_macro": 0.8615807415292696,
+ "eval_loss": 0.22770845890045166,
+ "eval_pr_auc": 0.7841620948509175,
+ "eval_precision": 0.7561815336463223,
+ "eval_precision_macro": 0.8572991684500658,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.866041164149359,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.948,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7686
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535825980738566,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22767424583435059,
+ "eval_pr_auc": 0.7842346625186999,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.669,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7704
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535961651774029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22765418887138367,
+ "eval_pr_auc": 0.7843006481928805,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.28,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536132165155758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22762420773506165,
+ "eval_pr_auc": 0.7844000578756268,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.732,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7740
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536164866352254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22761479020118713,
+ "eval_pr_auc": 0.7844231309752159,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.783,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7758
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536227348995558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760987281799316,
+ "eval_pr_auc": 0.7844561314285999,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.142,
+ "eval_steps_per_second": 4.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7776
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536278736590051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760248184204102,
+ "eval_pr_auc": 0.7844848515258783,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.759,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7794
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536346669432771,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.22759221494197845,
+ "eval_pr_auc": 0.7845111553142384,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.216,
+ "eval_samples_per_second": 754.683,
+ "eval_steps_per_second": 4.63,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7812
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536369248830353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275882065296173,
+ "eval_pr_auc": 0.7845220230482824,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.083,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7830
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536390660328059,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7678855325914149,
+ "eval_f1_macro": 0.8618523468803471,
+ "eval_loss": 0.2275806963443756,
+ "eval_pr_auc": 0.7845372505398349,
+ "eval_precision": 0.7572906867356538,
+ "eval_precision_macro": 0.8578309735638339,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660308282354665,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.493,
+ "eval_steps_per_second": 3.991,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7848
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536480004668485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275666743516922,
+ "eval_pr_auc": 0.7845807540266186,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.1853,
+ "eval_samples_per_second": 879.496,
+ "eval_steps_per_second": 5.396,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7866
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536552025160767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22755169868469238,
+ "eval_pr_auc": 0.7846223392518402,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2108,
+ "eval_samples_per_second": 773.198,
+ "eval_steps_per_second": 4.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7884
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.95366627809989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2275334894657135,
+ "eval_pr_auc": 0.784667391259121,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.311,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7902
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536672708147835,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753211855888367,
+ "eval_pr_auc": 0.7846795397266301,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2424,
+ "eval_samples_per_second": 672.466,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7920
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536694119645541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753164172172546,
+ "eval_pr_auc": 0.7846730527925044,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.862,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7938
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536803123633861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7677378300986318,
+ "eval_f1_macro": 0.8617811692096791,
+ "eval_loss": 0.22751472890377045,
+ "eval_pr_auc": 0.7847227034562287,
+ "eval_precision": 0.7576138147566719,
+ "eval_precision_macro": 0.8579369201187351,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657687125735536,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.172,
+ "eval_steps_per_second": 3.75,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536880691650549,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.22749866545200348,
+ "eval_pr_auc": 0.7847641543874231,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2405,
+ "eval_samples_per_second": 677.868,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7974
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536937626769448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2274913638830185,
+ "eval_pr_auc": 0.7847876605630844,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.896,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7992
+ },
+ {
+ "epoch": 444.44444444444446,
+ "grad_norm": 19008.333984375,
+ "learning_rate": 3.72113927636733e-08,
+ "loss": 0.1854,
+ "step": 8000
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.953696935471605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748790681362152,
+ "eval_pr_auc": 0.7848011703648987,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.861,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8010
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536995243163275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748683393001556,
+ "eval_pr_auc": 0.7848132010793348,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.648,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8028
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537016265361022,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748340666294098,
+ "eval_pr_auc": 0.7848243277439235,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.254,
+ "eval_steps_per_second": 4.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8046
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537030474809498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.2274865061044693,
+ "eval_pr_auc": 0.7848313481583303,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.69,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8064
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537053443507039,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22748111188411713,
+ "eval_pr_auc": 0.7848426067264029,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2031,
+ "eval_samples_per_second": 802.733,
+ "eval_steps_per_second": 4.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8082
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537081765079003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747540473937988,
+ "eval_pr_auc": 0.7848573104950377,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.273,
+ "eval_steps_per_second": 4.413,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8100
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537093638727732,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747638821601868,
+ "eval_pr_auc": 0.7848599055730325,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.395,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8118
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953713694834809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746768593788147,
+ "eval_pr_auc": 0.7848753398216984,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.726,
+ "eval_steps_per_second": 4.011,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8136
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537144345047297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746726870536804,
+ "eval_pr_auc": 0.7848749928109487,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.248,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8154
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537162252845378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746579349040985,
+ "eval_pr_auc": 0.7848822771158814,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.74,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8172
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537124685399404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747564315795898,
+ "eval_pr_auc": 0.7848568499525364,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.446,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537148724671828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747227549552917,
+ "eval_pr_auc": 0.7848699531009984,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2171,
+ "eval_samples_per_second": 750.821,
+ "eval_steps_per_second": 4.606,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8208
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537148043396901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747254371643066,
+ "eval_pr_auc": 0.784871702941944,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2395,
+ "eval_samples_per_second": 680.611,
+ "eval_steps_per_second": 4.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8226
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537153006971368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747036814689636,
+ "eval_pr_auc": 0.7848700532008381,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.347,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8244
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537154856146172,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747208178043365,
+ "eval_pr_auc": 0.7848674676035483,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2162,
+ "eval_samples_per_second": 753.78,
+ "eval_steps_per_second": 4.624,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8262
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537164783295108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22747138142585754,
+ "eval_pr_auc": 0.7848698987106607,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.29,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8280
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537188335942584,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.2274673730134964,
+ "eval_pr_auc": 0.7848777447449568,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.971,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8298
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537204881190811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22746412456035614,
+ "eval_pr_auc": 0.7848871874390749,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2183,
+ "eval_samples_per_second": 746.701,
+ "eval_steps_per_second": 4.581,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8316
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537223956888766,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274623066186905,
+ "eval_pr_auc": 0.784896188141376,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.736,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8334
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537243227236701,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274596393108368,
+ "eval_pr_auc": 0.7849067717237345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.711,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8352
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537252959735659,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745810449123383,
+ "eval_pr_auc": 0.7849091375774293,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.717,
+ "eval_steps_per_second": 3.863,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8370
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537274565883342,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274550497531891,
+ "eval_pr_auc": 0.7849133909574995,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.839,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8388
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537294225531237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274521142244339,
+ "eval_pr_auc": 0.7849233363238722,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.858,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8406
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537300065030612,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.227451354265213,
+ "eval_pr_auc": 0.7849296985834374,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.691,
+ "eval_steps_per_second": 3.759,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.953731164670437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744858264923096,
+ "eval_pr_auc": 0.7849332304496031,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1831,
+ "eval_samples_per_second": 890.399,
+ "eval_steps_per_second": 5.463,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8442
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537301622230444,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745059430599213,
+ "eval_pr_auc": 0.7849302221742441,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.282,
+ "eval_samples_per_second": 577.963,
+ "eval_steps_per_second": 3.546,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8460
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.95373253695279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274470180273056,
+ "eval_pr_auc": 0.7849404295185345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.882,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8478
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537337243176629,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744475305080414,
+ "eval_pr_auc": 0.7849440080112278,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.737,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8496
+ },
+ {
+ "epoch": 472.22222222222223,
+ "grad_norm": 16415.080078125,
+ "learning_rate": 9.409753403698373e-09,
+ "loss": 0.185,
+ "step": 8500
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537344250575877,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849494179086262,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.078,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8514
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537351647275085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849503915002546,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.686,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8532
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537357876074417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744180262088776,
+ "eval_pr_auc": 0.7849530625634429,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1819,
+ "eval_samples_per_second": 895.935,
+ "eval_steps_per_second": 5.497,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8550
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537367997873333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274399697780609,
+ "eval_pr_auc": 0.7849568926584508,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 633.091,
+ "eval_steps_per_second": 3.884,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8568
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537370139023102,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274392694234848,
+ "eval_pr_auc": 0.784958754067368,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.802,
+ "eval_steps_per_second": 3.747,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8586
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537376562472413,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22743819653987885,
+ "eval_pr_auc": 0.7849582401594454,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.444,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8604
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537377925022268,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849574280187238,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.921,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8622
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537384932421518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743669152259827,
+ "eval_pr_auc": 0.7849621773766102,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.492,
+ "eval_steps_per_second": 3.776,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8640
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382012671831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849637957606732,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.647,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537380066172038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.227437824010849,
+ "eval_pr_auc": 0.7849623241007161,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1901,
+ "eval_samples_per_second": 857.378,
+ "eval_steps_per_second": 5.26,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8676
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382791271747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743763029575348,
+ "eval_pr_auc": 0.7849636397572854,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.618,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8694
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537387365546257,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743697464466095,
+ "eval_pr_auc": 0.7849651242159179,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.398,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8712
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537391745170787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274360954761505,
+ "eval_pr_auc": 0.7849672499360805,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.673,
+ "eval_steps_per_second": 5.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8730
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739330237062,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.784968007514094,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.342,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8748
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.95373934970206,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.7849684807581643,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2101,
+ "eval_samples_per_second": 775.889,
+ "eval_steps_per_second": 4.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8766
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537393886320558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274361550807953,
+ "eval_pr_auc": 0.7849697577570401,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 652.0,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8784
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537394859570454,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743597626686096,
+ "eval_pr_auc": 0.784969480914725,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1914,
+ "eval_samples_per_second": 851.633,
+ "eval_steps_per_second": 5.225,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8802
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395540845381,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274358570575714,
+ "eval_pr_auc": 0.784970228255774,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.685,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8820
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395443520391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743581235408783,
+ "eval_pr_auc": 0.7849691356929127,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.26,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8838
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396222120308,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743573784828186,
+ "eval_pr_auc": 0.7849709563979171,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.069,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8856
+ },
+ {
+ "epoch": 493.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739583282035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849695249246844,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.242,
+ "eval_samples_per_second": 673.593,
+ "eval_steps_per_second": 4.132,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8874
+ },
+ {
+ "epoch": 494.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395443520391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849692821960631,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.934,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8892
+ },
+ {
+ "epoch": 495.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395248870412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694151476083,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2644,
+ "eval_samples_per_second": 616.534,
+ "eval_steps_per_second": 3.782,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8910
+ },
+ {
+ "epoch": 496.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396027470327,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849695246002066,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.692,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8928
+ },
+ {
+ "epoch": 497.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739563817037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694727996652,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2737,
+ "eval_samples_per_second": 595.465,
+ "eval_steps_per_second": 3.653,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8946
+ },
+ {
+ "epoch": 498.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395832820348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694859174267,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.524,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8964
+ },
+ {
+ "epoch": 499.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396027470328,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274356633424759,
+ "eval_pr_auc": 0.784969582259249,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.57,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8982
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 9000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 6751958238244128.0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8982/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca
+size 5368
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..426642c8e78b3fc8a910a2aa287de469af270abf
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:423cf42c264d2567fd289ade1e6c1f0e506d54000000d68db8e861743d892f63
+size 61385376
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ad0d550d08ca48892fb9f60694caceeeee59c1f1
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84e048a6921928cf224d4195130617e539f1adeb7a4f1611cbdb14ccefd71846
+size 122881658
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c11cf2d89486615ca4d9ed2951aa5910e62fcb32
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09c0fb83ad832f6cdeeca05a528dfc9d2b3afad5151c7ac2d59232a6247360ef
+size 14244
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c99a097a5743d551787f21bf50846f9e07dd1b2f
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d23023944b37031b4b7a09c8ee9683bdb1128842a447d5968d6407368b043a9
+size 1064
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..39a41914fca7c4ef3cb7ab594a14b89f74fb1db6
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/trainer_state.json
@@ -0,0 +1,12160 @@
+{
+ "best_global_step": 8856,
+ "best_metric": 0.7849709563979171,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-8856",
+ "epoch": 500.0,
+ "eval_steps": 500,
+ "global_step": 9000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3401972747610332,
+ "eval_auc": 0.39064302367564674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25900759435847653,
+ "eval_f1_macro": 0.3321798728791878,
+ "eval_loss": 1.0617570877075195,
+ "eval_pr_auc": 0.1212308124824295,
+ "eval_precision": 0.15736885928393005,
+ "eval_precision_macro": 0.49944165947453734,
+ "eval_pred_class_0": 5256,
+ "eval_pred_class_1": 14412,
+ "eval_predicted_binding_ratio": 0.7327638804148872,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.4991767473782156,
+ "eval_runtime": 0.304,
+ "eval_samples_per_second": 536.239,
+ "eval_steps_per_second": 3.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 18
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.341010778930242,
+ "eval_auc": 0.39081343973238586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2590750585948665,
+ "eval_f1_macro": 0.33285209231533375,
+ "eval_loss": 1.0604556798934937,
+ "eval_pr_auc": 0.12126612292918731,
+ "eval_precision": 0.1574485825458588,
+ "eval_precision_macro": 0.4995923731531417,
+ "eval_pred_class_0": 5276,
+ "eval_pred_class_1": 14392,
+ "eval_predicted_binding_ratio": 0.731747000203376,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.4993975193845588,
+ "eval_runtime": 0.2793,
+ "eval_samples_per_second": 583.516,
+ "eval_steps_per_second": 3.58,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 36
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.34223103518405534,
+ "eval_auc": 0.3911369382652214,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591765446944969,
+ "eval_f1_macro": 0.33385837704253485,
+ "eval_loss": 1.058252215385437,
+ "eval_pr_auc": 0.12133107613942488,
+ "eval_precision": 0.15756858376270713,
+ "eval_precision_macro": 0.4998170849458089,
+ "eval_pred_class_0": 5306,
+ "eval_pred_class_1": 14362,
+ "eval_predicted_binding_ratio": 0.7302216798861094,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.49972867739407356,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.165,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 54
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.3441122635753508,
+ "eval_auc": 0.3915867840995182,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259556882103088,
+ "eval_f1_macro": 0.33544605079873757,
+ "eval_loss": 1.0551481246948242,
+ "eval_pr_auc": 0.12142208631760734,
+ "eval_precision": 0.15788003631031353,
+ "eval_precision_macro": 0.500391299247358,
+ "eval_pred_class_0": 5347,
+ "eval_pred_class_1": 14321,
+ "eval_predicted_binding_ratio": 0.7281370754525117,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5005832394650029,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.796,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 72
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3457392719137686,
+ "eval_auc": 0.39218283153314865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2597791072250345,
+ "eval_f1_macro": 0.3367955302889021,
+ "eval_loss": 1.0511513948440552,
+ "eval_pr_auc": 0.12154600341235242,
+ "eval_precision": 0.15809003710705033,
+ "eval_precision_macro": 0.5007720380521324,
+ "eval_pred_class_0": 5385,
+ "eval_pred_class_1": 14283,
+ "eval_predicted_binding_ratio": 0.7262050030506406,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011558413086458,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.078,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 90
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.34873906853772624,
+ "eval_auc": 0.39291782012189097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.259894840238054,
+ "eval_f1_macro": 0.33921701928376435,
+ "eval_loss": 1.0462485551834106,
+ "eval_pr_auc": 0.1216940029412557,
+ "eval_precision": 0.1583133887089962,
+ "eval_precision_macro": 0.5011632853468087,
+ "eval_pred_class_0": 5462,
+ "eval_pred_class_1": 14206,
+ "eval_predicted_binding_ratio": 0.722290014236323,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.5017569691067321,
+ "eval_runtime": 0.2393,
+ "eval_samples_per_second": 681.219,
+ "eval_steps_per_second": 4.179,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 108
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3517897091722595,
+ "eval_auc": 0.3937714770704174,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2599407906193766,
+ "eval_f1_macro": 0.3416488972772128,
+ "eval_loss": 1.0405118465423584,
+ "eval_pr_auc": 0.12187498322705145,
+ "eval_precision": 0.1585020529520034,
+ "eval_precision_macro": 0.5014812682659693,
+ "eval_pred_class_0": 5542,
+ "eval_pred_class_1": 14126,
+ "eval_predicted_binding_ratio": 0.7182224933902787,
+ "eval_recall": 0.7220251531763947,
+ "eval_recall_macro": 0.5022572195531276,
+ "eval_runtime": 0.2765,
+ "eval_samples_per_second": 589.571,
+ "eval_steps_per_second": 3.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 126
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.354586129753915,
+ "eval_auc": 0.3947741191129793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2591338858410179,
+ "eval_f1_macro": 0.3436917965373002,
+ "eval_loss": 1.0338975191116333,
+ "eval_pr_auc": 0.12208733120990471,
+ "eval_precision": 0.1581985320316397,
+ "eval_precision_macro": 0.5009271275952343,
+ "eval_pred_class_0": 5635,
+ "eval_pred_class_1": 14033,
+ "eval_predicted_binding_ratio": 0.713494000406752,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.5014270471245847,
+ "eval_runtime": 0.2385,
+ "eval_samples_per_second": 683.567,
+ "eval_steps_per_second": 4.194,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 144
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3598230628431971,
+ "eval_auc": 0.39592411118975185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25913503971756396,
+ "eval_f1_macro": 0.3477761944928628,
+ "eval_loss": 1.0263975858688354,
+ "eval_pr_auc": 0.12233214426039367,
+ "eval_precision": 0.15848567727076435,
+ "eval_precision_macro": 0.5013938604573427,
+ "eval_pred_class_0": 5774,
+ "eval_pred_class_1": 13894,
+ "eval_predicted_binding_ratio": 0.70642668293675,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.502176595531767,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.873,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 162
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.3636872076469392,
+ "eval_auc": 0.3972021050928084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25801861623288075,
+ "eval_f1_macro": 0.35051449303788773,
+ "eval_loss": 1.0180495977401733,
+ "eval_pr_auc": 0.12260540333611444,
+ "eval_precision": 0.15807060874618625,
+ "eval_precision_macro": 0.5006720376838353,
+ "eval_pred_class_0": 5902,
+ "eval_pred_class_1": 13766,
+ "eval_predicted_binding_ratio": 0.6999186495830791,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.5010628083511147,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 601.838,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 180
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.3691275167785235,
+ "eval_auc": 0.39866621357342186,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.258604206500956,
+ "eval_f1_macro": 0.35478882891419483,
+ "eval_loss": 1.0087939500808716,
+ "eval_pr_auc": 0.12292033936081492,
+ "eval_precision": 0.1587092042537587,
+ "eval_precision_macro": 0.5016983780261003,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.5027194256611,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.126,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 198
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3740593858043523,
+ "eval_auc": 0.4002664991794433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25823944086280654,
+ "eval_f1_macro": 0.35841740282728696,
+ "eval_loss": 0.9987770318984985,
+ "eval_pr_auc": 0.12326823892822446,
+ "eval_precision": 0.15878778897451096,
+ "eval_precision_macro": 0.5017853397238077,
+ "eval_pred_class_0": 6172,
+ "eval_pred_class_1": 13496,
+ "eval_predicted_binding_ratio": 0.6861907667276794,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.5028947176998165,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.708,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 216
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.37980475899938987,
+ "eval_auc": 0.40207323055334293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25811945018854154,
+ "eval_f1_macro": 0.36265804779890953,
+ "eval_loss": 0.987876832485199,
+ "eval_pr_auc": 0.12366119818610516,
+ "eval_precision": 0.15905854133873024,
+ "eval_precision_macro": 0.5021624301446299,
+ "eval_pred_class_0": 6327,
+ "eval_pred_class_1": 13341,
+ "eval_predicted_binding_ratio": 0.6783099450884685,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.5035528974067893,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.68,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.3867703884482408,
+ "eval_auc": 0.40404318566725245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2582866982350409,
+ "eval_f1_macro": 0.36779990383041317,
+ "eval_loss": 0.9760332107543945,
+ "eval_pr_auc": 0.12409453800524387,
+ "eval_precision": 0.1595744680851064,
+ "eval_precision_macro": 0.5028818867776484,
+ "eval_pred_class_0": 6508,
+ "eval_pred_class_1": 13160,
+ "eval_predicted_binding_ratio": 0.6691071791742933,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.5048043507851898,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.919,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 252
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.39429530201342283,
+ "eval_auc": 0.40629005957398867,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2577107607950651,
+ "eval_f1_macro": 0.37306889008104693,
+ "eval_loss": 0.9632152915000916,
+ "eval_pr_auc": 0.12458802431940903,
+ "eval_precision": 0.15971578622181032,
+ "eval_precision_macro": 0.5029977740632862,
+ "eval_pred_class_0": 6720,
+ "eval_pred_class_1": 12948,
+ "eval_predicted_binding_ratio": 0.6583282489322758,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.5050772111259515,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.248,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 270
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.4020744356314826,
+ "eval_auc": 0.408681470822737,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25738822935084615,
+ "eval_f1_macro": 0.37848134871797623,
+ "eval_loss": 0.9496278166770935,
+ "eval_pr_auc": 0.12511500176534787,
+ "eval_precision": 0.16003140950137418,
+ "eval_precision_macro": 0.5033533652151325,
+ "eval_pred_class_0": 6933,
+ "eval_pred_class_1": 12735,
+ "eval_predicted_binding_ratio": 0.6474984746796827,
+ "eval_recall": 0.6572073524669462,
+ "eval_recall_macro": 0.5057630895249562,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.014,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 288
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.408989221069758,
+ "eval_auc": 0.4113766625614338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.255349135169763,
+ "eval_f1_macro": 0.38271123621844805,
+ "eval_loss": 0.9350630640983582,
+ "eval_pr_auc": 0.1257149536327416,
+ "eval_precision": 0.15932528579422817,
+ "eval_precision_macro": 0.5022775332449281,
+ "eval_pred_class_0": 7159,
+ "eval_pred_class_1": 12509,
+ "eval_predicted_binding_ratio": 0.6360077282896075,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.5039700323120913,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.61,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 306
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4161582265609111,
+ "eval_auc": 0.41440477389195646,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2538176619663396,
+ "eval_f1_macro": 0.3871501973338609,
+ "eval_loss": 0.9196970462799072,
+ "eval_pr_auc": 0.12640556118775828,
+ "eval_precision": 0.158935546875,
+ "eval_precision_macro": 0.5016899956597223,
+ "eval_pred_class_0": 7380,
+ "eval_pred_class_1": 12288,
+ "eval_predicted_binding_ratio": 0.62477120195241,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.5029831666503388,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.105,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 324
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.4237848281472443,
+ "eval_auc": 0.4176888499450513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25100786464873437,
+ "eval_f1_macro": 0.3913994084656603,
+ "eval_loss": 0.9033117294311523,
+ "eval_pr_auc": 0.12714405405007598,
+ "eval_precision": 0.15785536159600996,
+ "eval_precision_macro": 0.5002421610284318,
+ "eval_pred_class_0": 7638,
+ "eval_pred_class_1": 12030,
+ "eval_predicted_binding_ratio": 0.611653447223917,
+ "eval_recall": 0.6123831022250886,
+ "eval_recall_macro": 0.5004331156685895,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.078,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 342
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4312080536912752,
+ "eval_auc": 0.4212995000006521,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24904343156340203,
+ "eval_f1_macro": 0.39564573885956833,
+ "eval_loss": 0.8857852220535278,
+ "eval_pr_auc": 0.12799421494868934,
+ "eval_precision": 0.1572566971854866,
+ "eval_precision_macro": 0.49948708843014167,
+ "eval_pred_class_0": 7872,
+ "eval_pred_class_1": 11796,
+ "eval_predicted_binding_ratio": 0.5997559487492373,
+ "eval_recall": 0.5981941309255079,
+ "eval_recall_macro": 0.4990729210793411,
+ "eval_runtime": 0.2709,
+ "eval_samples_per_second": 601.789,
+ "eval_steps_per_second": 3.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 360
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.4394956274150905,
+ "eval_auc": 0.42538333442304876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24493150684931506,
+ "eval_f1_macro": 0.399632635701501,
+ "eval_loss": 0.8671084642410278,
+ "eval_pr_auc": 0.12894871744717554,
+ "eval_precision": 0.15549178189407775,
+ "eval_precision_macro": 0.4973810972146359,
+ "eval_pred_class_0": 8169,
+ "eval_pred_class_1": 11499,
+ "eval_predicted_binding_ratio": 0.5846552776082977,
+ "eval_recall": 0.5765881973556917,
+ "eval_recall_macro": 0.4952114645256155,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 607.95,
+ "eval_steps_per_second": 3.73,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 378
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.44824079723408583,
+ "eval_auc": 0.42976391273864795,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24080033580523297,
+ "eval_f1_macro": 0.4037241835563183,
+ "eval_loss": 0.8476783633232117,
+ "eval_pr_auc": 0.13001972671009082,
+ "eval_precision": 0.15375681229339766,
+ "eval_precision_macro": 0.495462476943159,
+ "eval_pred_class_0": 8475,
+ "eval_pred_class_1": 11193,
+ "eval_predicted_binding_ratio": 0.5690970103721782,
+ "eval_recall": 0.5549822637858756,
+ "eval_recall_macro": 0.491621632285284,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.096,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 396
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.46044335977221884,
+ "eval_auc": 0.4345819960798662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23819095477386934,
+ "eval_f1_macro": 0.4102471738365922,
+ "eval_loss": 0.8271914720535278,
+ "eval_pr_auc": 0.1312038077210987,
+ "eval_precision": 0.15319974143503556,
+ "eval_precision_macro": 0.49502955733365084,
+ "eval_pred_class_0": 8839,
+ "eval_pred_class_1": 10829,
+ "eval_predicted_binding_ratio": 0.5505897905226764,
+ "eval_recall": 0.5349887133182845,
+ "eval_recall_macro": 0.4907393617898237,
+ "eval_runtime": 0.2699,
+ "eval_samples_per_second": 603.817,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 414
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.4719849501728696,
+ "eval_auc": 0.4399397854182523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2337489854644728,
+ "eval_f1_macro": 0.4154821023975197,
+ "eval_loss": 0.8054794669151306,
+ "eval_pr_auc": 0.13253606290408437,
+ "eval_precision": 0.1515499425947187,
+ "eval_precision_macro": 0.4934724539362483,
+ "eval_pred_class_0": 9216,
+ "eval_pred_class_1": 10452,
+ "eval_predicted_binding_ratio": 0.5314215985356925,
+ "eval_recall": 0.5108029667849081,
+ "eval_recall_macro": 0.48776099326147077,
+ "eval_runtime": 0.2459,
+ "eval_samples_per_second": 662.968,
+ "eval_steps_per_second": 4.067,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 432
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.48713646532438476,
+ "eval_auc": 0.4457222328836341,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23099794160250056,
+ "eval_f1_macro": 0.42313846887516615,
+ "eval_loss": 0.7826969027519226,
+ "eval_pr_auc": 0.13403779679155806,
+ "eval_precision": 0.15125798722044728,
+ "eval_precision_macro": 0.4934698556077371,
+ "eval_pred_class_0": 9652,
+ "eval_pred_class_1": 10016,
+ "eval_predicted_binding_ratio": 0.5092536099247509,
+ "eval_recall": 0.48855207997420186,
+ "eval_recall_macro": 0.48771178574674356,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.772,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 450
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.506152125279642,
+ "eval_auc": 0.452125351005008,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22648721828462212,
+ "eval_f1_macro": 0.43188881620754876,
+ "eval_loss": 0.7587484121322632,
+ "eval_pr_auc": 0.13570124162691763,
+ "eval_precision": 0.15038071065989847,
+ "eval_precision_macro": 0.492983148122742,
+ "eval_pred_class_0": 10212,
+ "eval_pred_class_1": 9456,
+ "eval_predicted_binding_ratio": 0.4807809640024405,
+ "eval_recall": 0.4585617542728152,
+ "eval_recall_macro": 0.48681090671327726,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.566,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.529997966239577,
+ "eval_auc": 0.4588746151842906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22371514947934162,
+ "eval_f1_macro": 0.4433436473661838,
+ "eval_loss": 0.7342172861099243,
+ "eval_pr_auc": 0.13749280910612846,
+ "eval_precision": 0.15124332916997843,
+ "eval_precision_macro": 0.4941834913044441,
+ "eval_pred_class_0": 10861,
+ "eval_pred_class_1": 8807,
+ "eval_predicted_binding_ratio": 0.44778320113890585,
+ "eval_recall": 0.4295388584327636,
+ "eval_recall_macro": 0.4891703467029515,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.173,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 486
+ },
+ {
+ "epoch": 27.77777777777778,
+ "grad_norm": 191838.453125,
+ "learning_rate": 5.544444444444443e-07,
+ "loss": 0.954,
+ "step": 500
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.5579621720561317,
+ "eval_auc": 0.46628288633295734,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22208303507516106,
+ "eval_f1_macro": 0.4566736198103078,
+ "eval_loss": 0.7085328102111816,
+ "eval_pr_auc": 0.1397221721421834,
+ "eval_precision": 0.15368421052631578,
+ "eval_precision_macro": 0.4966212823527809,
+ "eval_pred_class_0": 11593,
+ "eval_pred_class_1": 8075,
+ "eval_predicted_binding_ratio": 0.4105653853976002,
+ "eval_recall": 0.400193485972267,
+ "eval_recall_macro": 0.49384334768221605,
+ "eval_runtime": 0.245,
+ "eval_samples_per_second": 665.264,
+ "eval_steps_per_second": 4.081,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 504
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.5890278625177954,
+ "eval_auc": 0.47432292318642716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21926011784023955,
+ "eval_f1_macro": 0.4701862470303913,
+ "eval_loss": 0.6820237636566162,
+ "eval_pr_auc": 0.1419831923592407,
+ "eval_precision": 0.15650854936569222,
+ "eval_precision_macro": 0.4990822386003719,
+ "eval_pred_class_0": 12416,
+ "eval_pred_class_1": 7252,
+ "eval_predicted_binding_ratio": 0.36872076469391907,
+ "eval_recall": 0.36601096420509516,
+ "eval_recall_macro": 0.49839149043235986,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 681.884,
+ "eval_steps_per_second": 4.183,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 522
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.62385600976205,
+ "eval_auc": 0.4829859859606367,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21797040169133192,
+ "eval_f1_macro": 0.4851734455906117,
+ "eval_loss": 0.6552286148071289,
+ "eval_pr_auc": 0.144533301478986,
+ "eval_precision": 0.16213241075640825,
+ "eval_precision_macro": 0.5032992807407407,
+ "eval_pred_class_0": 13309,
+ "eval_pred_class_1": 6359,
+ "eval_predicted_binding_ratio": 0.32331706324994913,
+ "eval_recall": 0.3324733956788133,
+ "eval_recall_macro": 0.5054351043101014,
+ "eval_runtime": 0.2289,
+ "eval_samples_per_second": 712.13,
+ "eval_steps_per_second": 4.369,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 540
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.6593959731543624,
+ "eval_auc": 0.4923438323703967,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21308586867144366,
+ "eval_f1_macro": 0.49787408315316334,
+ "eval_loss": 0.6283431053161621,
+ "eval_pr_auc": 0.14738118302130468,
+ "eval_precision": 0.16759053954175906,
+ "eval_precision_macro": 0.5068452136541568,
+ "eval_pred_class_0": 14256,
+ "eval_pred_class_1": 5412,
+ "eval_predicted_binding_ratio": 0.2751677852348993,
+ "eval_recall": 0.2924862947436311,
+ "eval_recall_macro": 0.5102800882784371,
+ "eval_runtime": 0.2716,
+ "eval_samples_per_second": 600.185,
+ "eval_steps_per_second": 3.682,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 558
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.6967154769168192,
+ "eval_auc": 0.5026651961769109,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21316449017280042,
+ "eval_f1_macro": 0.5126600281126953,
+ "eval_loss": 0.6013967990875244,
+ "eval_pr_auc": 0.15068393162958252,
+ "eval_precision": 0.18035714285714285,
+ "eval_precision_macro": 0.5146913446706046,
+ "eval_pred_class_0": 15188,
+ "eval_pred_class_1": 4480,
+ "eval_predicted_binding_ratio": 0.2277811673784828,
+ "eval_recall": 0.2605611093195743,
+ "eval_recall_macro": 0.5194578347949956,
+ "eval_runtime": 0.2705,
+ "eval_samples_per_second": 602.56,
+ "eval_steps_per_second": 3.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 576
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.7287472035794184,
+ "eval_auc": 0.5136348320064595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20692730786383232,
+ "eval_f1_macro": 0.5216610840892347,
+ "eval_loss": 0.575495719909668,
+ "eval_pr_auc": 0.15441446935423722,
+ "eval_precision": 0.19194704908990623,
+ "eval_precision_macro": 0.5210140431835268,
+ "eval_pred_class_0": 16042,
+ "eval_pred_class_1": 3626,
+ "eval_predicted_binding_ratio": 0.18436038234695953,
+ "eval_recall": 0.22444372782973235,
+ "eval_recall_macro": 0.5237930596654548,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.887,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 594
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.7577791336180598,
+ "eval_auc": 0.5256758602512032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20013431833445267,
+ "eval_f1_macro": 0.5287070633014385,
+ "eval_loss": 0.5502753853797913,
+ "eval_pr_auc": 0.15881070257620672,
+ "eval_precision": 0.20875656742556917,
+ "eval_precision_macro": 0.5298823579410603,
+ "eval_pred_class_0": 16813,
+ "eval_pred_class_1": 2855,
+ "eval_predicted_binding_ratio": 0.14515965019320723,
+ "eval_recall": 0.19219606578523057,
+ "eval_recall_macro": 0.5279203302306971,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.974,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 612
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.7844213951596501,
+ "eval_auc": 0.5388391234856937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19575113808801214,
+ "eval_f1_macro": 0.5356397775926205,
+ "eval_loss": 0.5265588164329529,
+ "eval_pr_auc": 0.16395620275178963,
+ "eval_precision": 0.23767848917549517,
+ "eval_precision_macro": 0.5449694383352471,
+ "eval_pred_class_0": 17497,
+ "eval_pred_class_1": 2171,
+ "eval_predicted_binding_ratio": 0.11038234695952817,
+ "eval_recall": 0.16639793614962914,
+ "eval_recall_macro": 0.5332502748895668,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.866,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 630
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8094366483628228,
+ "eval_auc": 0.5531888075405533,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.19189305735230702,
+ "eval_f1_macro": 0.5419376520838427,
+ "eval_loss": 0.5044229626655579,
+ "eval_pr_auc": 0.16987983494600534,
+ "eval_precision": 0.28952504879635654,
+ "eval_precision_macro": 0.5715178054086024,
+ "eval_pred_class_0": 18131,
+ "eval_pred_class_1": 1537,
+ "eval_predicted_binding_ratio": 0.07814724425462681,
+ "eval_recall": 0.14350209609803288,
+ "eval_recall_macro": 0.5387939646905326,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.446,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 648
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.827791336180598,
+ "eval_auc": 0.5689342779333475,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18796451690242147,
+ "eval_f1_macro": 0.5458235779450257,
+ "eval_loss": 0.4842270016670227,
+ "eval_pr_auc": 0.177326879876991,
+ "eval_precision": 0.3663551401869159,
+ "eval_precision_macro": 0.6103471582212137,
+ "eval_pred_class_0": 18598,
+ "eval_pred_class_1": 1070,
+ "eval_predicted_binding_ratio": 0.05440309131584299,
+ "eval_recall": 0.12641083521444696,
+ "eval_recall_macro": 0.5427430526648682,
+ "eval_runtime": 0.2392,
+ "eval_samples_per_second": 681.483,
+ "eval_steps_per_second": 4.181,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 666
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8386719544437665,
+ "eval_auc": 0.5868017348062602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1820056715648363,
+ "eval_f1_macro": 0.5462584975699354,
+ "eval_loss": 0.46564891934394836,
+ "eval_pr_auc": 0.18685168882837525,
+ "eval_precision": 0.4537275064267352,
+ "eval_precision_macro": 0.6541268553838282,
+ "eval_pred_class_0": 18890,
+ "eval_pred_class_1": 778,
+ "eval_predicted_binding_ratio": 0.039556640227781166,
+ "eval_recall": 0.11383424701709126,
+ "eval_recall_macro": 0.5440904198204911,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.584,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 684
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8441631075859264,
+ "eval_auc": 0.6057814605899876,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17982338774417983,
+ "eval_f1_macro": 0.5468627318225942,
+ "eval_loss": 0.4492926001548767,
+ "eval_pr_auc": 0.19848375437748741,
+ "eval_precision": 0.5283018867924528,
+ "eval_precision_macro": 0.6915101279275421,
+ "eval_pred_class_0": 19032,
+ "eval_pred_class_1": 636,
+ "eval_predicted_binding_ratio": 0.03233679072605247,
+ "eval_recall": 0.10835214446952596,
+ "eval_recall_macro": 0.5451219284549598,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.049,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.8456375838926175,
+ "eval_auc": 0.6262280880815292,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1758957654723127,
+ "eval_f1_macro": 0.5453696262568565,
+ "eval_loss": 0.43461790680885315,
+ "eval_pr_auc": 0.21275175506055685,
+ "eval_precision": 0.5557461406518011,
+ "eval_precision_macro": 0.7051195990133514,
+ "eval_pred_class_0": 19085,
+ "eval_pred_class_1": 583,
+ "eval_predicted_binding_ratio": 0.0296420581655481,
+ "eval_recall": 0.10448242502418574,
+ "eval_recall_macro": 0.544424468382196,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.873,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 720
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8483323164531218,
+ "eval_auc": 0.6481986497247736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18519530183010108,
+ "eval_f1_macro": 0.5507896621273841,
+ "eval_loss": 0.42107364535331726,
+ "eval_pr_auc": 0.23051421419341214,
+ "eval_precision": 0.6053571428571428,
+ "eval_precision_macro": 0.730405178085469,
+ "eval_pred_class_0": 19108,
+ "eval_pred_class_1": 560,
+ "eval_predicted_binding_ratio": 0.02847264592231035,
+ "eval_recall": 0.10931957433086101,
+ "eval_recall_macro": 0.5479899012476421,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.385,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 738
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8502135448444174,
+ "eval_auc": 0.6709813300109956,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20292207792207792,
+ "eval_f1_macro": 0.5601310726310726,
+ "eval_loss": 0.4084097743034363,
+ "eval_pr_auc": 0.25093797354762637,
+ "eval_precision": 0.6302521008403361,
+ "eval_precision_macro": 0.7436637739036264,
+ "eval_pred_class_0": 19073,
+ "eval_pred_class_1": 595,
+ "eval_predicted_binding_ratio": 0.03025218629245475,
+ "eval_recall": 0.12092873266688164,
+ "eval_recall_macro": 0.5538246608949184,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.746,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 756
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8526540573520439,
+ "eval_auc": 0.6936772353365158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22263948497854077,
+ "eval_f1_macro": 0.5706266398157138,
+ "eval_loss": 0.39666271209716797,
+ "eval_pr_auc": 0.2738840395423864,
+ "eval_precision": 0.6618819776714514,
+ "eval_precision_macro": 0.7604089789622948,
+ "eval_pred_class_0": 19041,
+ "eval_pred_class_1": 627,
+ "eval_predicted_binding_ratio": 0.031879194630872486,
+ "eval_recall": 0.13382779748468235,
+ "eval_recall_macro": 0.5605156371379469,
+ "eval_runtime": 0.2268,
+ "eval_samples_per_second": 718.638,
+ "eval_steps_per_second": 4.409,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 774
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.8544844417327638,
+ "eval_auc": 0.7158095511124275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24325753569539926,
+ "eval_f1_macro": 0.58138013196988,
+ "eval_loss": 0.38576817512512207,
+ "eval_pr_auc": 0.29905525248581355,
+ "eval_precision": 0.6754772393538914,
+ "eval_precision_macro": 0.7681910344870789,
+ "eval_pred_class_0": 18987,
+ "eval_pred_class_1": 681,
+ "eval_predicted_binding_ratio": 0.03462477120195241,
+ "eval_recall": 0.14833924540470816,
+ "eval_recall_macro": 0.5674997367845657,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.061,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 792
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8575859263778727,
+ "eval_auc": 0.737084336405228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2808729139922978,
+ "eval_f1_macro": 0.60092007766148,
+ "eval_loss": 0.37560486793518066,
+ "eval_pr_auc": 0.3260256629572295,
+ "eval_precision": 0.6889168765743073,
+ "eval_precision_macro": 0.7767992245539758,
+ "eval_pred_class_0": 18874,
+ "eval_pred_class_1": 794,
+ "eval_predicted_binding_ratio": 0.040370144396990035,
+ "eval_recall": 0.1763947113834247,
+ "eval_recall_macro": 0.5807427773130077,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.1,
+ "eval_steps_per_second": 3.725,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 810
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8611450071181614,
+ "eval_auc": 0.7571642141385685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.32451150136037593,
+ "eval_f1_macro": 0.623565358817779,
+ "eval_loss": 0.36611661314964294,
+ "eval_pr_auc": 0.3532860869347882,
+ "eval_precision": 0.6963906581740976,
+ "eval_precision_macro": 0.7829117661264593,
+ "eval_pred_class_0": 18726,
+ "eval_pred_class_1": 942,
+ "eval_predicted_binding_ratio": 0.047895057962172055,
+ "eval_recall": 0.21154466301193164,
+ "eval_recall_macro": 0.5971407144358867,
+ "eval_runtime": 0.2698,
+ "eval_samples_per_second": 604.115,
+ "eval_steps_per_second": 3.706,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 828
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8646532438478747,
+ "eval_auc": 0.7759390708192488,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3694931312174325,
+ "eval_f1_macro": 0.6468414565354121,
+ "eval_loss": 0.3574466407299042,
+ "eval_pr_auc": 0.380672409235741,
+ "eval_precision": 0.695807314897413,
+ "eval_precision_macro": 0.7853328912870632,
+ "eval_pred_class_0": 18547,
+ "eval_pred_class_1": 1121,
+ "eval_predicted_binding_ratio": 0.05699613585519626,
+ "eval_recall": 0.25153176394711385,
+ "eval_recall_macro": 0.6154743385438473,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.868,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 846
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8670937563555013,
+ "eval_auc": 0.7932697219796829,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4004587155963303,
+ "eval_f1_macro": 0.6628608765538834,
+ "eval_loss": 0.34963178634643555,
+ "eval_pr_auc": 0.40879055918048346,
+ "eval_precision": 0.69340746624305,
+ "eval_precision_macro": 0.7861898540406407,
+ "eval_pred_class_0": 18409,
+ "eval_pred_class_1": 1259,
+ "eval_predicted_binding_ratio": 0.06401260931462274,
+ "eval_recall": 0.2815220896485005,
+ "eval_recall_macro": 0.6291113798275701,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.281,
+ "eval_steps_per_second": 3.971,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 864
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8693308928208257,
+ "eval_auc": 0.8090460638591691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.43491644678979774,
+ "eval_f1_macro": 0.6805201987887128,
+ "eval_loss": 0.3424255847930908,
+ "eval_pr_auc": 0.43548439720530613,
+ "eval_precision": 0.6834830684174154,
+ "eval_precision_macro": 0.783786427463743,
+ "eval_pred_class_0": 18221,
+ "eval_pred_class_1": 1447,
+ "eval_predicted_binding_ratio": 0.07357128330282693,
+ "eval_recall": 0.3189293776201225,
+ "eval_recall_macro": 0.6456420293062284,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.097,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 882
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8711612772015457,
+ "eval_auc": 0.8231584209269593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.46153846153846156,
+ "eval_f1_macro": 0.6941824562962304,
+ "eval_loss": 0.3358187675476074,
+ "eval_pr_auc": 0.46013674866792464,
+ "eval_precision": 0.6766355140186916,
+ "eval_precision_macro": 0.7825407542966181,
+ "eval_pred_class_0": 18063,
+ "eval_pred_class_1": 1605,
+ "eval_predicted_binding_ratio": 0.0816046369737645,
+ "eval_recall": 0.35020960980328925,
+ "eval_recall_macro": 0.659441136162585,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 900
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8745169818995322,
+ "eval_auc": 0.8357514570475526,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.49363972096840375,
+ "eval_f1_macro": 0.7110123043354003,
+ "eval_loss": 0.32985639572143555,
+ "eval_pr_auc": 0.48277553791567623,
+ "eval_precision": 0.6785109983079526,
+ "eval_precision_macro": 0.7862239260888744,
+ "eval_pred_class_0": 17895,
+ "eval_pred_class_1": 1773,
+ "eval_predicted_binding_ratio": 0.0901464307504576,
+ "eval_recall": 0.38793937439535636,
+ "eval_recall_macro": 0.6767668140160521,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 653.0,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 918
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8772117144600367,
+ "eval_auc": 0.8463606400457255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.523574669560071,
+ "eval_f1_macro": 0.7265493507137326,
+ "eval_loss": 0.3246362507343292,
+ "eval_pr_auc": 0.5014434788718165,
+ "eval_precision": 0.6742886178861789,
+ "eval_precision_macro": 0.787031314592807,
+ "eval_pred_class_0": 17700,
+ "eval_pred_class_1": 1968,
+ "eval_predicted_binding_ratio": 0.10006101281269067,
+ "eval_recall": 0.4279264753305385,
+ "eval_recall_macro": 0.6946175504557563,
+ "eval_runtime": 0.2301,
+ "eval_samples_per_second": 708.248,
+ "eval_steps_per_second": 4.345,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.8556500280578212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5445847684653655,
+ "eval_f1_macro": 0.7374052543587455,
+ "eval_loss": 0.3201504647731781,
+ "eval_pr_auc": 0.5184804467620471,
+ "eval_precision": 0.6696470588235294,
+ "eval_precision_macro": 0.786998185969936,
+ "eval_pred_class_0": 17543,
+ "eval_pred_class_1": 2125,
+ "eval_predicted_binding_ratio": 0.10804352247305267,
+ "eval_recall": 0.45888423089326025,
+ "eval_recall_macro": 0.7082554190018906,
+ "eval_runtime": 0.2666,
+ "eval_samples_per_second": 611.344,
+ "eval_steps_per_second": 3.751,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 954
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8792454748830588,
+ "eval_auc": 0.8636336358823378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5594509367464292,
+ "eval_f1_macro": 0.744742407539513,
+ "eval_loss": 0.31616976857185364,
+ "eval_pr_auc": 0.5331175601979875,
+ "eval_precision": 0.6585152838427948,
+ "eval_precision_macro": 0.7834238290545543,
+ "eval_pred_class_0": 17378,
+ "eval_pred_class_1": 2290,
+ "eval_predicted_binding_ratio": 0.11643278421801911,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719546237029523,
+ "eval_runtime": 0.2694,
+ "eval_samples_per_second": 604.964,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 972
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8814317673378076,
+ "eval_auc": 0.8703512791725087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.578757225433526,
+ "eval_f1_macro": 0.7548815712966447,
+ "eval_loss": 0.31280621886253357,
+ "eval_pr_auc": 0.5453871030590061,
+ "eval_precision": 0.657905544147844,
+ "eval_precision_macro": 0.7854606348952531,
+ "eval_pred_class_0": 17233,
+ "eval_pred_class_1": 2435,
+ "eval_predicted_binding_ratio": 0.12380516575147447,
+ "eval_recall": 0.5166075459529185,
+ "eval_recall_macro": 0.7331634337478723,
+ "eval_runtime": 0.3648,
+ "eval_samples_per_second": 446.874,
+ "eval_steps_per_second": 2.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 990
+ },
+ {
+ "epoch": 55.55555555555556,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.996314582053105e-07,
+ "loss": 0.4604,
+ "step": 1000
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8834146837502542,
+ "eval_auc": 0.8759527216222862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5956621407159232,
+ "eval_f1_macro": 0.7637749289648232,
+ "eval_loss": 0.3100614845752716,
+ "eval_pr_auc": 0.5551596710183998,
+ "eval_precision": 0.6571984435797665,
+ "eval_precision_macro": 0.7873078426812156,
+ "eval_pred_class_0": 17098,
+ "eval_pred_class_1": 2570,
+ "eval_predicted_binding_ratio": 0.1306691071791743,
+ "eval_recall": 0.5446630119316349,
+ "eval_recall_macro": 0.745742503732462,
+ "eval_runtime": 0.2507,
+ "eval_samples_per_second": 650.302,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1008
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8840756558877364,
+ "eval_auc": 0.8809651824326759,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6064894718674491,
+ "eval_f1_macro": 0.7692574960553631,
+ "eval_loss": 0.30748215317726135,
+ "eval_pr_auc": 0.5634298069700459,
+ "eval_precision": 0.6524322317118455,
+ "eval_precision_macro": 0.7866284869899434,
+ "eval_pred_class_0": 16975,
+ "eval_pred_class_1": 2693,
+ "eval_predicted_binding_ratio": 0.13692292047996746,
+ "eval_recall": 0.5665914221218962,
+ "eval_recall_macro": 0.7550467824679621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.163,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1026
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.8849908480780964,
+ "eval_auc": 0.8853361353068065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6121399176954733,
+ "eval_f1_macro": 0.7723127955239544,
+ "eval_loss": 0.30511021614074707,
+ "eval_pr_auc": 0.5712324508006517,
+ "eval_precision": 0.6536067374588063,
+ "eval_precision_macro": 0.7879535133831199,
+ "eval_pred_class_0": 16937,
+ "eval_pred_class_1": 2731,
+ "eval_predicted_binding_ratio": 0.13885499288183853,
+ "eval_recall": 0.5756207674943566,
+ "eval_recall_macro": 0.7592596503615321,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.515,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1044
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.8853467561521253,
+ "eval_auc": 0.8892084338643703,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.616822429906542,
+ "eval_f1_macro": 0.7747051971959543,
+ "eval_loss": 0.3030014634132385,
+ "eval_pr_auc": 0.5778370115776272,
+ "eval_precision": 0.6519396551724138,
+ "eval_precision_macro": 0.7878864350252024,
+ "eval_pred_class_0": 16884,
+ "eval_pred_class_1": 2784,
+ "eval_predicted_binding_ratio": 0.1415497254423429,
+ "eval_recall": 0.5852950661077072,
+ "eval_recall_macro": 0.7634026486450891,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.008,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1062
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.884685784014643,
+ "eval_auc": 0.8924710108272688,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6202277294038848,
+ "eval_f1_macro": 0.7761251343338811,
+ "eval_loss": 0.3011925220489502,
+ "eval_pr_auc": 0.5832812236308141,
+ "eval_precision": 0.6450714036920934,
+ "eval_precision_macro": 0.7853564436451774,
+ "eval_pred_class_0": 16797,
+ "eval_pred_class_1": 2871,
+ "eval_predicted_binding_ratio": 0.14597315436241612,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7678594421600216,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.98,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1080
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8856518202155786,
+ "eval_auc": 0.8954398707041407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6242272347535506,
+ "eval_f1_macro": 0.7783964874556335,
+ "eval_loss": 0.29942846298217773,
+ "eval_pr_auc": 0.5886815510653964,
+ "eval_precision": 0.6477115117891817,
+ "eval_precision_macro": 0.7871243450270979,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7705297965613797,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.921,
+ "eval_steps_per_second": 3.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1098
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8865161683953631,
+ "eval_auc": 0.8978366542923133,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6288659793814433,
+ "eval_f1_macro": 0.7809416026191173,
+ "eval_loss": 0.29785510897636414,
+ "eval_pr_auc": 0.593021329597711,
+ "eval_precision": 0.6491589426707861,
+ "eval_precision_macro": 0.7884708470441367,
+ "eval_pred_class_0": 16755,
+ "eval_pred_class_1": 2913,
+ "eval_predicted_binding_ratio": 0.14810860280658938,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7740571948209012,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.719,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1116
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8871262965222697,
+ "eval_auc": 0.8998632136201572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6315964155326916,
+ "eval_f1_macro": 0.7824748814379159,
+ "eval_loss": 0.2964444160461426,
+ "eval_pr_auc": 0.5970041919243015,
+ "eval_precision": 0.6505982905982906,
+ "eval_precision_macro": 0.789523000044412,
+ "eval_pred_class_0": 16743,
+ "eval_pred_class_1": 2925,
+ "eval_predicted_binding_ratio": 0.14871873093349602,
+ "eval_recall": 0.6136730087068688,
+ "eval_recall_macro": 0.7759920545435715,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.792,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1134
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.8876855806386008,
+ "eval_auc": 0.9017429582012333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6356589147286822,
+ "eval_f1_macro": 0.7846343742639293,
+ "eval_loss": 0.2951850891113281,
+ "eval_pr_auc": 0.6005268804358049,
+ "eval_precision": 0.650573936529372,
+ "eval_precision_macro": 0.7901498917652248,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.7794694277584535,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.376,
+ "eval_steps_per_second": 3.739,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1152
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8881940207443563,
+ "eval_auc": 0.9033021142666618,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6377861966727063,
+ "eval_f1_macro": 0.785840340182137,
+ "eval_loss": 0.2939698398113251,
+ "eval_pr_auc": 0.6035211605243039,
+ "eval_precision": 0.6518518518518519,
+ "eval_precision_macro": 0.7910415086304414,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6243147371815544,
+ "eval_recall_macro": 0.7809507530297222,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.415,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8882448647549319,
+ "eval_auc": 0.9048048023731414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389618922470434,
+ "eval_f1_macro": 0.7864263262967652,
+ "eval_loss": 0.29283198714256287,
+ "eval_pr_auc": 0.6066927627742578,
+ "eval_precision": 0.6511550050217609,
+ "eval_precision_macro": 0.7909273016835919,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6272170267655595,
+ "eval_recall_macro": 0.7821604539875966,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.68,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1188
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.888346552776083,
+ "eval_auc": 0.9061457752769495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6402359108781127,
+ "eval_f1_macro": 0.7870775124924988,
+ "eval_loss": 0.29169291257858276,
+ "eval_pr_auc": 0.6096183698390041,
+ "eval_precision": 0.6506826506826506,
+ "eval_precision_macro": 0.7909278839971909,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.783400335424737,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.516,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1206
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9074730837522218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641781270464964,
+ "eval_f1_macro": 0.7879665952661885,
+ "eval_loss": 0.2904839515686035,
+ "eval_pr_auc": 0.6127271933864005,
+ "eval_precision": 0.6518124376454939,
+ "eval_precision_macro": 0.7916645766644131,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6320541760722348,
+ "eval_recall_macro": 0.7844281262446042,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.958,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1224
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.889261744966443,
+ "eval_auc": 0.9085174295528148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6431847968545217,
+ "eval_f1_macro": 0.788822778783544,
+ "eval_loss": 0.28959015011787415,
+ "eval_pr_auc": 0.6152976575518759,
+ "eval_precision": 0.6536796536796536,
+ "eval_precision_macro": 0.7926964124983926,
+ "eval_pred_class_0": 16665,
+ "eval_pred_class_1": 3003,
+ "eval_predicted_binding_ratio": 0.15268456375838926,
+ "eval_recall": 0.6330216059335698,
+ "eval_recall_macro": 0.7851231045301337,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.894,
+ "eval_steps_per_second": 4.079,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1242
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9095093658465239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6446766819072501,
+ "eval_f1_macro": 0.7895790973067505,
+ "eval_loss": 0.2887136936187744,
+ "eval_pr_auc": 0.6176593727552148,
+ "eval_precision": 0.6529937148527952,
+ "eval_precision_macro": 0.7926428472131204,
+ "eval_pred_class_0": 16645,
+ "eval_pred_class_1": 3023,
+ "eval_predicted_binding_ratio": 0.15370144396990035,
+ "eval_recall": 0.636568848758465,
+ "eval_recall_macro": 0.7866251016291872,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.817,
+ "eval_steps_per_second": 3.796,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1260
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.889821029082774,
+ "eval_auc": 0.9104295930879169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6464349812367434,
+ "eval_f1_macro": 0.7905888279869988,
+ "eval_loss": 0.28783899545669556,
+ "eval_pr_auc": 0.619972501272285,
+ "eval_precision": 0.6542272126816381,
+ "eval_precision_macro": 0.7934597601869728,
+ "eval_pred_class_0": 16640,
+ "eval_pred_class_1": 3028,
+ "eval_predicted_binding_ratio": 0.1539556640227781,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7878141307592769,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.334,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1278
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8899735611145008,
+ "eval_auc": 0.911281293804153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.646289637136319,
+ "eval_f1_macro": 0.7905721170208057,
+ "eval_loss": 0.28697267174720764,
+ "eval_pr_auc": 0.6225153426830469,
+ "eval_precision": 0.6552867086509778,
+ "eval_precision_macro": 0.7938916277024632,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.6375362786198001,
+ "eval_recall_macro": 0.7873804408732489,
+ "eval_runtime": 0.2764,
+ "eval_samples_per_second": 589.762,
+ "eval_steps_per_second": 3.618,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1296
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.890736221273134,
+ "eval_auc": 0.911925575502615,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6505122784192552,
+ "eval_f1_macro": 0.7928790035842321,
+ "eval_loss": 0.2863345444202423,
+ "eval_pr_auc": 0.6235765349975187,
+ "eval_precision": 0.6561679790026247,
+ "eval_precision_macro": 0.7949612458190018,
+ "eval_pred_class_0": 16620,
+ "eval_pred_class_1": 3048,
+ "eval_predicted_binding_ratio": 0.1549725442342892,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7908474781742385,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.574,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1314
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9126060328997004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506692784851453,
+ "eval_f1_macro": 0.793115428161573,
+ "eval_loss": 0.28558436036109924,
+ "eval_pr_auc": 0.625919370718976,
+ "eval_precision": 0.6588429752066116,
+ "eval_precision_macro": 0.7961342196828587,
+ "eval_pred_class_0": 16643,
+ "eval_pred_class_1": 3025,
+ "eval_predicted_binding_ratio": 0.15380313199105144,
+ "eval_recall": 0.6426959045469204,
+ "eval_recall_macro": 0.7902016976709372,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.167,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1332
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8916005694529184,
+ "eval_auc": 0.9132873078266985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6526555881394591,
+ "eval_f1_macro": 0.794217425975266,
+ "eval_loss": 0.28493690490722656,
+ "eval_pr_auc": 0.6278320531638758,
+ "eval_precision": 0.6595324333223576,
+ "eval_precision_macro": 0.7967555738856391,
+ "eval_pred_class_0": 16631,
+ "eval_pred_class_1": 3037,
+ "eval_predicted_binding_ratio": 0.1544132601179581,
+ "eval_recall": 0.6459206707513705,
+ "eval_recall_macro": 0.7917537198146302,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.773,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1350
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139370494570753,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6551948051948052,
+ "eval_f1_macro": 0.7955863102414826,
+ "eval_loss": 0.28430166840553284,
+ "eval_pr_auc": 0.6292546024902547,
+ "eval_precision": 0.6596927100359594,
+ "eval_precision_macro": 0.7972435493102309,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7939610311131058,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.459,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1368
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.8921598535692495,
+ "eval_auc": 0.9146080371326758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6542787286063569,
+ "eval_f1_macro": 0.7951975553215214,
+ "eval_loss": 0.283497154712677,
+ "eval_pr_auc": 0.6315022943889131,
+ "eval_precision": 0.6615029663810151,
+ "eval_precision_macro": 0.7978670296615908,
+ "eval_pred_class_0": 16634,
+ "eval_pred_class_1": 3034,
+ "eval_predicted_binding_ratio": 0.15426072808623145,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7926099364103822,
+ "eval_runtime": 0.2203,
+ "eval_samples_per_second": 740.067,
+ "eval_steps_per_second": 4.54,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1386
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9151027497871649,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655037495924356,
+ "eval_f1_macro": 0.7956531976941219,
+ "eval_loss": 0.2829034626483917,
+ "eval_pr_auc": 0.6331226154536788,
+ "eval_precision": 0.6623804813715793,
+ "eval_precision_macro": 0.7983678781970611,
+ "eval_pred_class_0": 16635,
+ "eval_pred_class_1": 3033,
+ "eval_predicted_binding_ratio": 0.1542098840756559,
+ "eval_recall": 0.6478555304740407,
+ "eval_recall_macro": 0.7930229544686254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.571,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8925157616432784,
+ "eval_auc": 0.9156585533376076,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6559244791666666,
+ "eval_f1_macro": 0.7961172166862497,
+ "eval_loss": 0.28232645988464355,
+ "eval_pr_auc": 0.6343326075351273,
+ "eval_precision": 0.6621754847190273,
+ "eval_precision_macro": 0.7984260882241754,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6497903901967107,
+ "eval_recall_macro": 0.7938696624128962,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.665,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1422
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9162826303682348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6572219508223416,
+ "eval_f1_macro": 0.7969043930945569,
+ "eval_loss": 0.28162533044815063,
+ "eval_pr_auc": 0.6363164977912346,
+ "eval_precision": 0.6638157894736842,
+ "eval_precision_macro": 0.7993423426560147,
+ "eval_pred_class_0": 16628,
+ "eval_pred_class_1": 3040,
+ "eval_predicted_binding_ratio": 0.15456579214968477,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.79453446021916,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.266,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1440
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8928208257067317,
+ "eval_auc": 0.9167705886684476,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.657124268054652,
+ "eval_f1_macro": 0.7968036671115732,
+ "eval_loss": 0.2811121940612793,
+ "eval_pr_auc": 0.6378571407612313,
+ "eval_precision": 0.6629471611421069,
+ "eval_precision_macro": 0.7989544782306408,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6514027732989358,
+ "eval_recall_macro": 0.7947060344432748,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.342,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1458
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9172343422437541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6581001951854262,
+ "eval_f1_macro": 0.7973820247953165,
+ "eval_loss": 0.2804972231388092,
+ "eval_pr_auc": 0.6394259907419034,
+ "eval_precision": 0.6639317361339022,
+ "eval_precision_macro": 0.7995370130040789,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7952802908117405,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.86,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1476
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8933801098230628,
+ "eval_auc": 0.9176662705474707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597436313483693,
+ "eval_f1_macro": 0.7982647858607822,
+ "eval_loss": 0.28001976013183594,
+ "eval_pr_auc": 0.6407525699560299,
+ "eval_precision": 0.6639451338994121,
+ "eval_precision_macro": 0.7998155152816343,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6555949693647211,
+ "eval_recall_macro": 0.7967417715176355,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.967,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1494
+ },
+ {
+ "epoch": 83.33333333333333,
+ "grad_norm": 11845.0048828125,
+ "learning_rate": 9.86567120987093e-07,
+ "loss": 0.2741,
+ "step": 1500
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9181069872977458,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592015579357352,
+ "eval_f1_macro": 0.7979494857864604,
+ "eval_loss": 0.2794816195964813,
+ "eval_pr_auc": 0.6421665674351047,
+ "eval_precision": 0.6635086573015354,
+ "eval_precision_macro": 0.799538997766201,
+ "eval_pred_class_0": 16607,
+ "eval_pred_class_1": 3061,
+ "eval_predicted_binding_ratio": 0.15563351637177142,
+ "eval_recall": 0.654950016123831,
+ "eval_recall_macro": 0.7963891144179245,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.121,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1512
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9184344761551537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6595330739299611,
+ "eval_f1_macro": 0.7981095181516664,
+ "eval_loss": 0.27904370427131653,
+ "eval_pr_auc": 0.6429990318434126,
+ "eval_precision": 0.6631887838278449,
+ "eval_precision_macro": 0.7994577736379149,
+ "eval_pred_class_0": 16601,
+ "eval_pred_class_1": 3067,
+ "eval_predicted_binding_ratio": 0.15593858043522474,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.796782287910794,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.288,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1530
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8933292658124873,
+ "eval_auc": 0.9189190864757253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585286458333334,
+ "eval_f1_macro": 0.797660321952579,
+ "eval_loss": 0.27837634086608887,
+ "eval_pr_auc": 0.6447520419594072,
+ "eval_precision": 0.664804469273743,
+ "eval_precision_macro": 0.7999811820052926,
+ "eval_pred_class_0": 16625,
+ "eval_pred_class_1": 3043,
+ "eval_predicted_binding_ratio": 0.15471832418141143,
+ "eval_recall": 0.6523702031602708,
+ "eval_recall_macro": 0.7954010127288045,
+ "eval_runtime": 0.2401,
+ "eval_samples_per_second": 678.873,
+ "eval_steps_per_second": 4.165,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1548
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8936851738865161,
+ "eval_auc": 0.9193183038504471,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6602761982128351,
+ "eval_f1_macro": 0.7986291029941847,
+ "eval_loss": 0.27788689732551575,
+ "eval_pr_auc": 0.6462039154555116,
+ "eval_precision": 0.6653569089718402,
+ "eval_precision_macro": 0.8005067920325675,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.655272492744276,
+ "eval_recall_macro": 0.7967917965622751,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.0,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1566
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8941427699816962,
+ "eval_auc": 0.9197055599839506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.662012987012987,
+ "eval_f1_macro": 0.7996283888525269,
+ "eval_loss": 0.2774609923362732,
+ "eval_pr_auc": 0.6474319229516793,
+ "eval_precision": 0.6665576985943119,
+ "eval_precision_macro": 0.8013082309577014,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.7979808256923646,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.753,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1584
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9201319018332661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6636378387149116,
+ "eval_f1_macro": 0.8005736295133055,
+ "eval_loss": 0.27695581316947937,
+ "eval_pr_auc": 0.6490255599400047,
+ "eval_precision": 0.6678641410842586,
+ "eval_precision_macro": 0.8021363340613392,
+ "eval_pred_class_0": 16606,
+ "eval_pred_class_1": 3062,
+ "eval_predicted_binding_ratio": 0.15568436038234695,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.7990387969914977,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 606.045,
+ "eval_steps_per_second": 3.718,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1602
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.920403555344157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6648262422864566,
+ "eval_f1_macro": 0.8013081720805965,
+ "eval_loss": 0.27657878398895264,
+ "eval_pr_auc": 0.6500340451314033,
+ "eval_precision": 0.6696107294733399,
+ "eval_precision_macro": 0.8030794000144978,
+ "eval_pred_class_0": 16611,
+ "eval_pred_class_1": 3057,
+ "eval_predicted_binding_ratio": 0.1554301403294692,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7995725369668047,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.642,
+ "eval_steps_per_second": 3.832,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1620
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9207465675374016,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6654746423927178,
+ "eval_f1_macro": 0.8017284012349317,
+ "eval_loss": 0.27611449360847473,
+ "eval_pr_auc": 0.6512249430736048,
+ "eval_precision": 0.6709275647328745,
+ "eval_precision_macro": 0.8037492731289094,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6601096420509514,
+ "eval_recall_macro": 0.7997536198424009,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.736,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8954647142566605,
+ "eval_auc": 0.9211146506479597,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6662337662337663,
+ "eval_f1_macro": 0.802130627992697,
+ "eval_loss": 0.275691956281662,
+ "eval_pr_auc": 0.6524138573777828,
+ "eval_precision": 0.6708074534161491,
+ "eval_precision_macro": 0.8038244624537546,
+ "eval_pred_class_0": 16609,
+ "eval_pred_class_1": 3059,
+ "eval_predicted_binding_ratio": 0.1555318283506203,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8004692699557154,
+ "eval_runtime": 0.2702,
+ "eval_samples_per_second": 603.318,
+ "eval_steps_per_second": 3.701,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1656
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8956680902989628,
+ "eval_auc": 0.9214137790034065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024110910186859,
+ "eval_loss": 0.27531710267066956,
+ "eval_pr_auc": 0.6535015844647576,
+ "eval_precision": 0.6716857610474631,
+ "eval_precision_macro": 0.8042712197761243,
+ "eval_pred_class_0": 16613,
+ "eval_pred_class_1": 3055,
+ "eval_predicted_binding_ratio": 0.15532845230831807,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8005899918727795,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.638,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1674
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8956172462883872,
+ "eval_auc": 0.9217489565349906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6664500406173842,
+ "eval_f1_macro": 0.802288641055505,
+ "eval_loss": 0.27483227849006653,
+ "eval_pr_auc": 0.6546621751515824,
+ "eval_precision": 0.6715782580222659,
+ "eval_precision_macro": 0.8041892734676155,
+ "eval_pred_class_0": 16614,
+ "eval_pred_class_1": 3054,
+ "eval_predicted_binding_ratio": 0.15527760829774254,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8004287535625569,
+ "eval_runtime": 0.2382,
+ "eval_samples_per_second": 684.396,
+ "eval_steps_per_second": 4.199,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1692
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.8959731543624161,
+ "eval_auc": 0.9220882801111304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6672088484059857,
+ "eval_f1_macro": 0.8027800298435859,
+ "eval_loss": 0.27437010407447815,
+ "eval_pr_auc": 0.6561534178394561,
+ "eval_precision": 0.6731211027239908,
+ "eval_precision_macro": 0.8049740042228342,
+ "eval_pred_class_0": 16621,
+ "eval_pred_class_1": 3047,
+ "eval_predicted_binding_ratio": 0.15492170022371365,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.800640016917419,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.273,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1710
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9223952917907324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8024646720298894,
+ "eval_loss": 0.27396437525749207,
+ "eval_pr_auc": 0.6571939169041311,
+ "eval_precision": 0.6726854891661195,
+ "eval_precision_macro": 0.8046979364973901,
+ "eval_pred_class_0": 16622,
+ "eval_pred_class_1": 3046,
+ "eval_predicted_binding_ratio": 0.1548708562131381,
+ "eval_recall": 0.6607545952918413,
+ "eval_recall_macro": 0.8002873598177079,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.423,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1728
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9226829844599047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6679655788277318,
+ "eval_f1_macro": 0.8031632457541016,
+ "eval_loss": 0.2735843360424042,
+ "eval_pr_auc": 0.6581443956834908,
+ "eval_precision": 0.6726618705035972,
+ "eval_precision_macro": 0.8049040839573975,
+ "eval_pred_class_0": 16610,
+ "eval_pred_class_1": 3058,
+ "eval_predicted_binding_ratio": 0.15548098434004473,
+ "eval_recall": 0.6633344082554015,
+ "eval_recall_macro": 0.801456544382424,
+ "eval_runtime": 0.2325,
+ "eval_samples_per_second": 701.163,
+ "eval_steps_per_second": 4.302,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1746
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8961765304047183,
+ "eval_auc": 0.9230679047936586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668075422626788,
+ "eval_f1_macro": 0.8032698713905396,
+ "eval_loss": 0.2730555534362793,
+ "eval_pr_auc": 0.6597232694357793,
+ "eval_precision": 0.6735496558505408,
+ "eval_precision_macro": 0.8053010360254088,
+ "eval_pred_class_0": 16617,
+ "eval_pred_class_1": 3051,
+ "eval_predicted_binding_ratio": 0.15512507626601588,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.801284970158309,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.817,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1764
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8961256863941428,
+ "eval_auc": 0.9233850285396773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6689353427321342,
+ "eval_f1_macro": 0.8036671286252258,
+ "eval_loss": 0.272703617811203,
+ "eval_pr_auc": 0.6608360801532023,
+ "eval_precision": 0.6723127035830619,
+ "eval_precision_macro": 0.8049176483332829,
+ "eval_pred_class_0": 16598,
+ "eval_pred_class_1": 3070,
+ "eval_predicted_binding_ratio": 0.1560911124669514,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8024343101576514,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.703,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1782
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9236704438040937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6696891191709845,
+ "eval_f1_macro": 0.804084607836397,
+ "eval_loss": 0.27227067947387695,
+ "eval_pr_auc": 0.661859432748859,
+ "eval_precision": 0.672520325203252,
+ "eval_precision_macro": 0.8051325786806955,
+ "eval_pred_class_0": 16593,
+ "eval_pred_class_1": 3075,
+ "eval_predicted_binding_ratio": 0.15634533251982916,
+ "eval_recall": 0.6668816510802967,
+ "eval_recall_macro": 0.8030490829192756,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.071,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1800
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9239191675474416,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6695821185617103,
+ "eval_f1_macro": 0.8040329626642457,
+ "eval_loss": 0.2719270884990692,
+ "eval_pr_auc": 0.6626733311213273,
+ "eval_precision": 0.6726326065733811,
+ "eval_precision_macro": 0.8051623412499325,
+ "eval_pred_class_0": 16595,
+ "eval_pred_class_1": 3073,
+ "eval_predicted_binding_ratio": 0.15624364449867806,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.802918025088319,
+ "eval_runtime": 0.2736,
+ "eval_samples_per_second": 595.794,
+ "eval_steps_per_second": 3.655,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1818
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9242257996595844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6714146973130463,
+ "eval_f1_macro": 0.805096334723234,
+ "eval_loss": 0.271486759185791,
+ "eval_pr_auc": 0.6637007390734015,
+ "eval_precision": 0.6740331491712708,
+ "eval_precision_macro": 0.8060660592459934,
+ "eval_pred_class_0": 16591,
+ "eval_pred_class_1": 3077,
+ "eval_predicted_binding_ratio": 0.15644702054098028,
+ "eval_recall": 0.6688165108029668,
+ "eval_recall_macro": 0.8041372346976746,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.758,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1836
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8973459426479561,
+ "eval_auc": 0.9244978230054357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.673248098397799,
+ "eval_f1_macro": 0.8061779895433214,
+ "eval_loss": 0.2711206376552582,
+ "eval_pr_auc": 0.6646142778873767,
+ "eval_precision": 0.6757634827810266,
+ "eval_precision_macro": 0.8071101922645338,
+ "eval_pred_class_0": 16590,
+ "eval_pred_class_1": 3078,
+ "eval_predicted_binding_ratio": 0.15649786455155582,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.8052555669553397,
+ "eval_runtime": 0.2242,
+ "eval_samples_per_second": 727.107,
+ "eval_steps_per_second": 4.461,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1854
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8971934106162294,
+ "eval_auc": 0.9248346842593396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6732385261797027,
+ "eval_f1_macro": 0.8061196854381076,
+ "eval_loss": 0.2707342207431793,
+ "eval_pr_auc": 0.6659244139495173,
+ "eval_precision": 0.6747651441528992,
+ "eval_precision_macro": 0.8066847854532062,
+ "eval_pred_class_0": 16581,
+ "eval_pred_class_1": 3087,
+ "eval_predicted_binding_ratio": 0.15695546064673582,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8055581990104113,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.197,
+ "eval_steps_per_second": 3.921,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8975493186902583,
+ "eval_auc": 0.9250955152313902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746326497658647,
+ "eval_f1_macro": 0.8069177490147248,
+ "eval_loss": 0.2704195976257324,
+ "eval_pr_auc": 0.666907608407933,
+ "eval_precision": 0.675614489003881,
+ "eval_precision_macro": 0.8072811827258788,
+ "eval_pred_class_0": 16576,
+ "eval_pred_class_1": 3092,
+ "eval_predicted_binding_ratio": 0.15720968069961358,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8065558093510122,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.977,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1890
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8977018507219849,
+ "eval_auc": 0.9254069649305167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6749596122778675,
+ "eval_f1_macro": 0.8071292359343842,
+ "eval_loss": 0.26996490359306335,
+ "eval_pr_auc": 0.6681450085536085,
+ "eval_precision": 0.6762706377468436,
+ "eval_precision_macro": 0.8076147808433838,
+ "eval_pred_class_0": 16579,
+ "eval_pred_class_1": 3089,
+ "eval_predicted_binding_ratio": 0.15705714866788692,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8066463507888102,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.086,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1908
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8980577587960138,
+ "eval_auc": 0.9256414889578861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6758286176232822,
+ "eval_f1_macro": 0.8076738937412058,
+ "eval_loss": 0.2696084678173065,
+ "eval_pr_auc": 0.6691135839215693,
+ "eval_precision": 0.6776913099870299,
+ "eval_precision_macro": 0.8083644683075526,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.673976136730087,
+ "eval_recall_macro": 0.8069886719746289,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.158,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1926
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9258192627838369,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6769726247987118,
+ "eval_f1_macro": 0.8082079811791663,
+ "eval_loss": 0.26944610476493835,
+ "eval_pr_auc": 0.6696389857739906,
+ "eval_precision": 0.676101640398842,
+ "eval_precision_macro": 0.8078859551713395,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8085311854668409,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.512,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1944
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9261110139050743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.676779086654833,
+ "eval_f1_macro": 0.8081683537924276,
+ "eval_loss": 0.26896923780441284,
+ "eval_pr_auc": 0.6708410126864026,
+ "eval_precision": 0.6773255813953488,
+ "eval_precision_macro": 0.8083707318031536,
+ "eval_pred_class_0": 16572,
+ "eval_pred_class_1": 3096,
+ "eval_predicted_binding_ratio": 0.1574130567419158,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8079664377498563,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.096,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1962
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8985153548911938,
+ "eval_auc": 0.9263511243868452,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6783757653883339,
+ "eval_f1_macro": 0.809064127789247,
+ "eval_loss": 0.26861709356307983,
+ "eval_pr_auc": 0.6718712574127733,
+ "eval_precision": 0.677938808373591,
+ "eval_precision_macro": 0.808902387342021,
+ "eval_pred_class_0": 16563,
+ "eval_pred_class_1": 3105,
+ "eval_predicted_binding_ratio": 0.15787065283709578,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8092261637523704,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.936,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1980
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9265832055569767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6794644297467334,
+ "eval_f1_macro": 0.8097506232989936,
+ "eval_loss": 0.2682516574859619,
+ "eval_pr_auc": 0.6727910026400046,
+ "eval_precision": 0.6797934151065204,
+ "eval_precision_macro": 0.8098725675411902,
+ "eval_pred_class_0": 16570,
+ "eval_pred_class_1": 3098,
+ "eval_predicted_binding_ratio": 0.1575147447630669,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.809628845896721,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.644,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1998
+ },
+ {
+ "epoch": 111.11111111111111,
+ "grad_norm": 13330.4609375,
+ "learning_rate": 9.552616846852138e-07,
+ "loss": 0.252,
+ "step": 2000
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.899176327028676,
+ "eval_auc": 0.9269119888367457,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6793856103476152,
+ "eval_f1_macro": 0.8097842051315767,
+ "eval_loss": 0.2677896022796631,
+ "eval_pr_auc": 0.6743175064299574,
+ "eval_precision": 0.6812581063553826,
+ "eval_precision_macro": 0.8104795114507255,
+ "eval_pred_class_0": 16584,
+ "eval_pred_class_1": 3084,
+ "eval_predicted_binding_ratio": 0.15680292861500916,
+ "eval_recall": 0.6775233795549823,
+ "eval_recall_macro": 0.8090942786590025,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.193,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2016
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.927058521341044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812832500403031,
+ "eval_f1_macro": 0.8108073208521016,
+ "eval_loss": 0.26760444045066833,
+ "eval_pr_auc": 0.6746134464200654,
+ "eval_precision": 0.6811734364925854,
+ "eval_precision_macro": 0.8107666047608406,
+ "eval_pred_class_0": 16566,
+ "eval_pred_class_1": 3102,
+ "eval_predicted_binding_ratio": 0.15771812080536912,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8108480555060766,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.901,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2034
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8993797030709783,
+ "eval_auc": 0.9272620862892311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6811664250040277,
+ "eval_f1_macro": 0.81071512110173,
+ "eval_loss": 0.267299622297287,
+ "eval_pr_auc": 0.6753372489001316,
+ "eval_precision": 0.6806181584030908,
+ "eval_precision_macro": 0.8105119532505733,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8109187523785011,
+ "eval_runtime": 0.2924,
+ "eval_samples_per_second": 557.453,
+ "eval_steps_per_second": 3.42,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2052
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8994813910921293,
+ "eval_auc": 0.9274648142425077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6817962337035248,
+ "eval_f1_macro": 0.8110548055574955,
+ "eval_loss": 0.2670327126979828,
+ "eval_pr_auc": 0.6759104665767571,
+ "eval_precision": 0.6805912596401028,
+ "eval_precision_macro": 0.8106085073266955,
+ "eval_pred_class_0": 16556,
+ "eval_pred_class_1": 3112,
+ "eval_predicted_binding_ratio": 0.15822656091112466,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.811503344660859,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.841,
+ "eval_steps_per_second": 3.999,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2070
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8995830791132805,
+ "eval_auc": 0.927707601161492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6824248271426274,
+ "eval_f1_macro": 0.8113938913621764,
+ "eval_loss": 0.2667410373687744,
+ "eval_pr_auc": 0.676645416517246,
+ "eval_precision": 0.6805644644002565,
+ "eval_precision_macro": 0.8107051929252038,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.812087936943217,
+ "eval_runtime": 0.2676,
+ "eval_samples_per_second": 609.094,
+ "eval_steps_per_second": 3.737,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2088
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.927944061956154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6828796907714608,
+ "eval_f1_macro": 0.8117208850210732,
+ "eval_loss": 0.26635268330574036,
+ "eval_pr_auc": 0.6777081363329963,
+ "eval_precision": 0.6821106821106822,
+ "eval_precision_macro": 0.8114357758379498,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8120069041569002,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.137,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9282739839383012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6831315577078289,
+ "eval_f1_macro": 0.8119498952052617,
+ "eval_loss": 0.2659379541873932,
+ "eval_pr_auc": 0.6790830976589266,
+ "eval_precision": 0.6839043309631545,
+ "eval_precision_macro": 0.8122369488772572,
+ "eval_pred_class_0": 16574,
+ "eval_pred_class_1": 3094,
+ "eval_predicted_binding_ratio": 0.1573113687207647,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8116637557086703,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.745,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2124
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9284926050623749,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6838460299565148,
+ "eval_f1_macro": 0.812294615183528,
+ "eval_loss": 0.2656570076942444,
+ "eval_pr_auc": 0.6798372835892805,
+ "eval_precision": 0.6830759330759331,
+ "eval_precision_macro": 0.8120089810307202,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.8125811605253657,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.824,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2142
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.9001932072401871,
+ "eval_auc": 0.9287595480437707,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6841512469831054,
+ "eval_f1_macro": 0.8124418563951485,
+ "eval_loss": 0.26531943678855896,
+ "eval_pr_auc": 0.6808986584956077,
+ "eval_precision": 0.682723185613359,
+ "eval_precision_macro": 0.8119125170545953,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8129743340182352,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.857,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2160
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9289936341086871,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851344822032533,
+ "eval_f1_macro": 0.8130595887334677,
+ "eval_loss": 0.26495063304901123,
+ "eval_pr_auc": 0.6818525990139518,
+ "eval_precision": 0.6843629343629344,
+ "eval_precision_macro": 0.8127732546210807,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.8133468356833198,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.159,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2178
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.9010067114093959,
+ "eval_auc": 0.9291471740122346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6880307643005928,
+ "eval_f1_macro": 0.8146000626156236,
+ "eval_loss": 0.2648627460002899,
+ "eval_pr_auc": 0.6821768129155847,
+ "eval_precision": 0.6837579617834395,
+ "eval_precision_macro": 0.8130188647252145,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.692357304095453,
+ "eval_recall_macro": 0.8162094361365779,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.373,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2196
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9293803062922532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687851067244423,
+ "eval_f1_macro": 0.8145493064661928,
+ "eval_loss": 0.26444998383522034,
+ "eval_pr_auc": 0.6832369783380787,
+ "eval_precision": 0.6846645367412141,
+ "eval_precision_macro": 0.8133686693864494,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8157455657712839,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.052,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2214
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.929644952403895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6885774125986789,
+ "eval_f1_macro": 0.815114870687036,
+ "eval_loss": 0.26401567459106445,
+ "eval_pr_auc": 0.6842971435384069,
+ "eval_precision": 0.6880231809401159,
+ "eval_precision_macro": 0.8149088251035563,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8153213845367371,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 729.933,
+ "eval_steps_per_second": 4.478,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2232
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9298876614628875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6892782510850346,
+ "eval_f1_macro": 0.8154529561328843,
+ "eval_loss": 0.26374292373657227,
+ "eval_pr_auc": 0.6852698748697685,
+ "eval_precision": 0.6871794871794872,
+ "eval_precision_macro": 0.8146738625165021,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.691389874234118,
+ "eval_recall_macro": 0.8162387893534325,
+ "eval_runtime": 0.2352,
+ "eval_samples_per_second": 692.981,
+ "eval_steps_per_second": 4.251,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.9020235916209071,
+ "eval_auc": 0.9300761410376911,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909382518043304,
+ "eval_f1_macro": 0.8163612439650636,
+ "eval_loss": 0.2635449767112732,
+ "eval_pr_auc": 0.685931037905785,
+ "eval_precision": 0.6873005743458839,
+ "eval_precision_macro": 0.815012329026093,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.6946146404385682,
+ "eval_recall_macro": 0.8177304505385936,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.44,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2268
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9303049910181687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6925181013676589,
+ "eval_f1_macro": 0.8174102840403102,
+ "eval_loss": 0.263118714094162,
+ "eval_pr_auc": 0.6869422132706717,
+ "eval_precision": 0.691072575465639,
+ "eval_precision_macro": 0.8168725206674576,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.6939696871976782,
+ "eval_recall_macro": 0.8179512225449368,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.976,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2286
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9304756990498765,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.2629205286502838,
+ "eval_pr_auc": 0.6875064110834537,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2304
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9306633221647718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937530110807772,
+ "eval_f1_macro": 0.818077689508494,
+ "eval_loss": 0.2625824213027954,
+ "eval_pr_auc": 0.6881187823465719,
+ "eval_precision": 0.690978886756238,
+ "eval_precision_macro": 0.8170466915947796,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.6965495001612383,
+ "eval_recall_macro": 0.8191204071096527,
+ "eval_runtime": 0.2718,
+ "eval_samples_per_second": 599.717,
+ "eval_steps_per_second": 3.679,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2322
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9308322783466673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2622954547405243,
+ "eval_pr_auc": 0.6887035254510873,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.881,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9309924460820045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6929866795056974,
+ "eval_f1_macro": 0.8176004232749752,
+ "eval_loss": 0.26203182339668274,
+ "eval_pr_auc": 0.6893090005690568,
+ "eval_precision": 0.689776357827476,
+ "eval_precision_macro": 0.8164083143593783,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6962270235407932,
+ "eval_recall_macro": 0.8188082664031002,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.443,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2358
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9312000694822565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6936286310383566,
+ "eval_f1_macro": 0.817981812876073,
+ "eval_loss": 0.26177045702934265,
+ "eval_pr_auc": 0.6902838377634022,
+ "eval_precision": 0.6904153354632588,
+ "eval_precision_macro": 0.8167882699809945,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.6968719767816833,
+ "eval_recall_macro": 0.8191911039820772,
+ "eval_runtime": 0.2756,
+ "eval_samples_per_second": 591.34,
+ "eval_steps_per_second": 3.628,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2376
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.902938783811267,
+ "eval_auc": 0.9314264084780033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933333333333334,
+ "eval_f1_macro": 0.817839388722781,
+ "eval_loss": 0.2614164650440216,
+ "eval_pr_auc": 0.6912123921690412,
+ "eval_precision": 0.6907810499359796,
+ "eval_precision_macro": 0.816890766747487,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.6959045469203483,
+ "eval_recall_macro": 0.8187979304892078,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.769,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2394
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9316330780933575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934576434656807,
+ "eval_f1_macro": 0.8179352236654993,
+ "eval_loss": 0.26109954714775085,
+ "eval_pr_auc": 0.6919150376493911,
+ "eval_precision": 0.6913461538461538,
+ "eval_precision_macro": 0.817149992562429,
+ "eval_pred_class_0": 16548,
+ "eval_pred_class_1": 3120,
+ "eval_predicted_binding_ratio": 0.1586333129957291,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8187272336167832,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.557,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2412
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9318176062735843,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694733472066592,
+ "eval_f1_macro": 0.8185505131193367,
+ "eval_loss": 0.2609698474407196,
+ "eval_pr_auc": 0.69244594350898,
+ "eval_precision": 0.6897647806738716,
+ "eval_precision_macro": 0.8167078351983328,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.6997742663656885,
+ "eval_recall_macro": 0.8204309854192178,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.333,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2430
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9320372006475538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941536479304236,
+ "eval_f1_macro": 0.8184144035830462,
+ "eval_loss": 0.2604828178882599,
+ "eval_pr_auc": 0.6935318303087233,
+ "eval_precision": 0.6933719433719434,
+ "eval_precision_macro": 0.8181231697536046,
+ "eval_pred_class_0": 16560,
+ "eval_pred_class_1": 3108,
+ "eval_predicted_binding_ratio": 0.15802318486882244,
+ "eval_recall": 0.6949371170590132,
+ "eval_recall_macro": 0.8187065617889984,
+ "eval_runtime": 0.2485,
+ "eval_samples_per_second": 655.847,
+ "eval_steps_per_second": 4.024,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2448
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9321443165310757,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960972488803583,
+ "eval_f1_macro": 0.8193338378363828,
+ "eval_loss": 0.2604370415210724,
+ "eval_pr_auc": 0.693643099537468,
+ "eval_precision": 0.6905744208187877,
+ "eval_precision_macro": 0.8172857573610195,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8214285957598189,
+ "eval_runtime": 0.2215,
+ "eval_samples_per_second": 736.024,
+ "eval_steps_per_second": 4.515,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2466
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9323432098797633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695638229634381,
+ "eval_f1_macro": 0.8191484199531422,
+ "eval_loss": 0.2601032257080078,
+ "eval_pr_auc": 0.694473981068256,
+ "eval_precision": 0.691866028708134,
+ "eval_precision_macro": 0.817746962215919,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8205715519016554,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.928,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2484
+ },
+ {
+ "epoch": 138.88888888888889,
+ "grad_norm": 12954.3583984375,
+ "learning_rate": 9.068887706579789e-07,
+ "loss": 0.2385,
+ "step": 2500
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9325454122780963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969404186795491,
+ "eval_f1_macro": 0.8200635197304043,
+ "eval_loss": 0.25969693064689636,
+ "eval_pr_auc": 0.6954050242581626,
+ "eval_precision": 0.6960437439691219,
+ "eval_precision_macro": 0.819729100681946,
+ "eval_pred_class_0": 16559,
+ "eval_pred_class_1": 3109,
+ "eval_predicted_binding_ratio": 0.158074028879398,
+ "eval_recall": 0.6978394066430184,
+ "eval_recall_macro": 0.820399150415129,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.439,
+ "eval_steps_per_second": 3.763,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2502
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9326487714170208,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971977582065653,
+ "eval_f1_macro": 0.8200261554019741,
+ "eval_loss": 0.2596379518508911,
+ "eval_pr_auc": 0.6956168134976223,
+ "eval_precision": 0.6924300254452926,
+ "eval_precision_macro": 0.8182556808417458,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8218312779041694,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 626.028,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2520
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.904311572096807,
+ "eval_auc": 0.9329799294265357,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962556488056811,
+ "eval_f1_macro": 0.819733135205496,
+ "eval_loss": 0.25904589891433716,
+ "eval_pr_auc": 0.6970902220334548,
+ "eval_precision": 0.6969305331179322,
+ "eval_precision_macro": 0.8199852086334245,
+ "eval_pred_class_0": 16573,
+ "eval_pred_class_1": 3095,
+ "eval_predicted_binding_ratio": 0.15736221273134024,
+ "eval_recall": 0.6955820702999033,
+ "eval_recall_macro": 0.8194817455984336,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.771,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2538
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9330630060376336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994061948322902,
+ "eval_f1_macro": 0.8214143192859533,
+ "eval_loss": 0.2590080201625824,
+ "eval_pr_auc": 0.697179333543334,
+ "eval_precision": 0.6961661341853035,
+ "eval_precision_macro": 0.8202078705755396,
+ "eval_pred_class_0": 16538,
+ "eval_pred_class_1": 3130,
+ "eval_predicted_binding_ratio": 0.15914175310148465,
+ "eval_recall": 0.7026765559496937,
+ "eval_recall_macro": 0.8226366421928706,
+ "eval_runtime": 0.2495,
+ "eval_samples_per_second": 653.345,
+ "eval_steps_per_second": 4.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2556
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9332962551076398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991164658634538,
+ "eval_f1_macro": 0.8212745809731632,
+ "eval_loss": 0.25865858793258667,
+ "eval_pr_auc": 0.6981950669404262,
+ "eval_precision": 0.6965428937259923,
+ "eval_precision_macro": 0.8203156925109651,
+ "eval_pred_class_0": 16544,
+ "eval_pred_class_1": 3124,
+ "eval_predicted_binding_ratio": 0.15883668903803133,
+ "eval_recall": 0.7017091260883586,
+ "eval_recall_macro": 0.8222434687000011,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.149,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9334749729859892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.2584296464920044,
+ "eval_pr_auc": 0.6986932637956595,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2769,
+ "eval_samples_per_second": 588.703,
+ "eval_steps_per_second": 3.612,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2592
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.9050742322554403,
+ "eval_auc": 0.9336868786857826,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991136180499598,
+ "eval_f1_macro": 0.8213807805320277,
+ "eval_loss": 0.25803840160369873,
+ "eval_pr_auc": 0.699660351667112,
+ "eval_precision": 0.6987757731958762,
+ "eval_precision_macro": 0.8212545854629465,
+ "eval_pred_class_0": 16564,
+ "eval_pred_class_1": 3104,
+ "eval_predicted_binding_ratio": 0.15781980882652025,
+ "eval_recall": 0.6994517897452435,
+ "eval_recall_macro": 0.8215071467589017,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.163,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2610
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.9047183241814114,
+ "eval_auc": 0.9337996783486955,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995831997435076,
+ "eval_f1_macro": 0.8214817322060338,
+ "eval_loss": 0.2579362094402313,
+ "eval_pr_auc": 0.6999214829412527,
+ "eval_precision": 0.6955690149824674,
+ "eval_precision_macro": 0.8199882459220607,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7036439858110287,
+ "eval_recall_macro": 0.8229996352064741,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.317,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2628
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9339161666287131,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.701647736362182,
+ "eval_f1_macro": 0.8226388900943448,
+ "eval_loss": 0.2578524649143219,
+ "eval_pr_auc": 0.7001081914566338,
+ "eval_precision": 0.6961904761904761,
+ "eval_precision_macro": 0.820610070399391,
+ "eval_pred_class_0": 16518,
+ "eval_pred_class_1": 3150,
+ "eval_predicted_binding_ratio": 0.16015863331299574,
+ "eval_recall": 0.7071912286359239,
+ "eval_recall_macro": 0.8247128956603896,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.43,
+ "eval_steps_per_second": 3.954,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2646
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.9050233882448647,
+ "eval_auc": 0.9341703697689739,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995818591186876,
+ "eval_f1_macro": 0.8215887434369935,
+ "eval_loss": 0.25731131434440613,
+ "eval_pr_auc": 0.7012902373057504,
+ "eval_precision": 0.697786333012512,
+ "eval_precision_macro": 0.8209190259709409,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8222633132653747,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.858,
+ "eval_steps_per_second": 3.981,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2664
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.9052776082977425,
+ "eval_auc": 0.9343408636857047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003377834968635,
+ "eval_f1_macro": 0.8220430425380088,
+ "eval_loss": 0.25706911087036133,
+ "eval_pr_auc": 0.7018656885391451,
+ "eval_precision": 0.6986521181001284,
+ "eval_precision_macro": 0.8214140242506442,
+ "eval_pred_class_0": 16552,
+ "eval_pred_class_1": 3116,
+ "eval_predicted_binding_ratio": 0.1584299369534269,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8226763313236177,
+ "eval_runtime": 0.2513,
+ "eval_samples_per_second": 648.672,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2682
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9345392996069414,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006758931445124,
+ "eval_f1_macro": 0.822259931959612,
+ "eval_loss": 0.2567782402038574,
+ "eval_pr_auc": 0.7026441101697649,
+ "eval_precision": 0.6993254095727593,
+ "eval_precision_macro": 0.8217557280421937,
+ "eval_pred_class_0": 16555,
+ "eval_pred_class_1": 3113,
+ "eval_predicted_binding_ratio": 0.15827740492170023,
+ "eval_recall": 0.7020316027088036,
+ "eval_recall_macro": 0.8227668727614157,
+ "eval_runtime": 0.2591,
+ "eval_samples_per_second": 629.188,
+ "eval_steps_per_second": 3.86,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2700
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.9051250762660159,
+ "eval_auc": 0.934685296823797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003853564547207,
+ "eval_f1_macro": 0.8220121780461352,
+ "eval_loss": 0.25664329528808594,
+ "eval_pr_auc": 0.7029644506395569,
+ "eval_precision": 0.6974736168851935,
+ "eval_precision_macro": 0.8209271234175075,
+ "eval_pred_class_0": 16541,
+ "eval_pred_class_1": 3127,
+ "eval_predicted_binding_ratio": 0.158989221069758,
+ "eval_recall": 0.7033215091905837,
+ "eval_recall_macro": 0.8231100212096456,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.856,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2718
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9349076649599691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008216529724505,
+ "eval_f1_macro": 0.8223840221758023,
+ "eval_loss": 0.25626465678215027,
+ "eval_pr_auc": 0.7040942732775656,
+ "eval_precision": 0.7002575660012879,
+ "eval_precision_macro": 0.82217322207805,
+ "eval_pred_class_0": 16562,
+ "eval_pred_class_1": 3106,
+ "eval_predicted_binding_ratio": 0.15792149684767134,
+ "eval_recall": 0.7013866494679136,
+ "eval_recall_macro": 0.8225952985373008,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.699,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2736
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.905226764287167,
+ "eval_auc": 0.9350594919437002,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7014734144778988,
+ "eval_f1_macro": 0.8225728005545544,
+ "eval_loss": 0.2562600076198578,
+ "eval_pr_auc": 0.7046022469135804,
+ "eval_precision": 0.6967865097041044,
+ "eval_precision_macro": 0.8208289583316286,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7062237987745889,
+ "eval_recall_macro": 0.8243499026467862,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.279,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2754
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9352728672508359,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010773436243769,
+ "eval_f1_macro": 0.8224715159707777,
+ "eval_loss": 0.25581732392311096,
+ "eval_pr_auc": 0.7056629719926804,
+ "eval_precision": 0.6991661321359846,
+ "eval_precision_macro": 0.821758292654095,
+ "eval_pred_class_0": 16550,
+ "eval_pred_class_1": 3118,
+ "eval_predicted_binding_ratio": 0.15853162497457798,
+ "eval_recall": 0.7029990325701386,
+ "eval_recall_macro": 0.8231902267335512,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.5,
+ "eval_steps_per_second": 3.844,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2772
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.9354356530283926,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7025204687750842,
+ "eval_f1_macro": 0.8232752161134611,
+ "eval_loss": 0.25560733675956726,
+ "eval_pr_auc": 0.7062858637533224,
+ "eval_precision": 0.6994884910485933,
+ "eval_precision_macro": 0.8221444873622652,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7055788455336988,
+ "eval_recall_macro": 0.8244197722567993,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.27,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2790
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9356189159837551,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7028415475999358,
+ "eval_f1_macro": 0.8234659606184656,
+ "eval_loss": 0.2553412616252899,
+ "eval_pr_auc": 0.7070636650718337,
+ "eval_precision": 0.6998081841432225,
+ "eval_precision_macro": 0.8223345636556499,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.7059013221541438,
+ "eval_recall_macro": 0.8246111910462879,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.017,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9357918624902231,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7031199742682535,
+ "eval_f1_macro": 0.8236899466727462,
+ "eval_loss": 0.25509998202323914,
+ "eval_pr_auc": 0.7077594222055618,
+ "eval_precision": 0.7013153673403913,
+ "eval_precision_macro": 0.8230158493399438,
+ "eval_pred_class_0": 16551,
+ "eval_pred_class_1": 3117,
+ "eval_predicted_binding_ratio": 0.15848078096400245,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8243689199497484,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.309,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2826
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9359727409833409,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7034593724859212,
+ "eval_f1_macro": 0.8239074586532139,
+ "eval_loss": 0.2548539340496063,
+ "eval_pr_auc": 0.7084539339673516,
+ "eval_precision": 0.7019910083493899,
+ "eval_precision_macro": 0.8233586792381238,
+ "eval_pred_class_0": 16554,
+ "eval_pred_class_1": 3114,
+ "eval_predicted_binding_ratio": 0.1583282489322758,
+ "eval_recall": 0.7049338922928088,
+ "eval_recall_macro": 0.8244594613875464,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.242,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2844
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9360812778117108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7043380822794941,
+ "eval_f1_macro": 0.8242594639388645,
+ "eval_loss": 0.2548294961452484,
+ "eval_pr_auc": 0.708809734771939,
+ "eval_precision": 0.6993006993006993,
+ "eval_precision_macro": 0.822383674913635,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.709448564979039,
+ "eval_recall_macro": 0.8261735491038733,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.758,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2862
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9362697476540152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7040308334671591,
+ "eval_f1_macro": 0.82418310527748,
+ "eval_loss": 0.25451889634132385,
+ "eval_pr_auc": 0.70979034812957,
+ "eval_precision": 0.7012156110044786,
+ "eval_precision_macro": 0.8231322886360805,
+ "eval_pred_class_0": 16542,
+ "eval_pred_class_1": 3126,
+ "eval_predicted_binding_ratio": 0.15893837705918243,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8252458083732854,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.302,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2880
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9364340127714132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7047863796980405,
+ "eval_f1_macro": 0.824637224883753,
+ "eval_loss": 0.25425252318382263,
+ "eval_pr_auc": 0.7104452483887299,
+ "eval_precision": 0.70208,
+ "eval_precision_macro": 0.8236265925164723,
+ "eval_pred_class_0": 16543,
+ "eval_pred_class_1": 3125,
+ "eval_predicted_binding_ratio": 0.15888753304860687,
+ "eval_recall": 0.7075137052563689,
+ "eval_recall_macro": 0.8256588264315284,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.767,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2898
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9366581522223957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7045965927354548,
+ "eval_f1_macro": 0.8245456841795291,
+ "eval_loss": 0.25392982363700867,
+ "eval_pr_auc": 0.7114160919166963,
+ "eval_precision": 0.7023389939122077,
+ "eval_precision_macro": 0.8237022823552698,
+ "eval_pred_class_0": 16547,
+ "eval_pred_class_1": 3121,
+ "eval_predicted_binding_ratio": 0.15868415700630464,
+ "eval_recall": 0.7068687520154788,
+ "eval_recall_macro": 0.8253967107696154,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.473,
+ "eval_steps_per_second": 4.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2916
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.936698911928028,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7069544364508393,
+ "eval_f1_macro": 0.8257724934589374,
+ "eval_loss": 0.25399070978164673,
+ "eval_pr_auc": 0.7113652786898896,
+ "eval_precision": 0.7010145846544071,
+ "eval_precision_macro": 0.8235604593370134,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7129958078039342,
+ "eval_recall_macro": 0.8280377119541189,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.019,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2934
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9369248810888143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7062780269058296,
+ "eval_f1_macro": 0.8254283885284618,
+ "eval_loss": 0.25362086296081543,
+ "eval_pr_auc": 0.7123203296069245,
+ "eval_precision": 0.7015590200445434,
+ "eval_precision_macro": 0.8236690712930735,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7110609480812641,
+ "eval_recall_macro": 0.827221184489114,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.444,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2952
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.9072096806996136,
+ "eval_auc": 0.9371562809840187,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7070155723230053,
+ "eval_f1_macro": 0.8259456391835222,
+ "eval_loss": 0.2532632350921631,
+ "eval_pr_auc": 0.7133697274093473,
+ "eval_precision": 0.7039641943734015,
+ "eval_precision_macro": 0.8248055554696512,
+ "eval_pred_class_0": 16540,
+ "eval_pred_class_1": 3128,
+ "eval_predicted_binding_ratio": 0.15904006508033353,
+ "eval_recall": 0.710093518219929,
+ "eval_recall_macro": 0.8270996353096386,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.417,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2970
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9071588366890381,
+ "eval_auc": 0.9372476886142239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7072779737095223,
+ "eval_f1_macro": 0.8260542385315996,
+ "eval_loss": 0.2531469762325287,
+ "eval_pr_auc": 0.7136781835058345,
+ "eval_precision": 0.7032196365954734,
+ "eval_precision_macro": 0.8245394656269969,
+ "eval_pred_class_0": 16531,
+ "eval_pred_class_1": 3137,
+ "eval_predicted_binding_ratio": 0.15949766117551353,
+ "eval_recall": 0.7113834247017091,
+ "eval_recall_macro": 0.8275936861541986,
+ "eval_runtime": 0.2653,
+ "eval_samples_per_second": 614.378,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2988
+ },
+ {
+ "epoch": 166.66666666666666,
+ "grad_norm": 14056.4111328125,
+ "learning_rate": 8.432618494003656e-07,
+ "loss": 0.2279,
+ "step": 3000
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.9072605247101891,
+ "eval_auc": 0.9373718655684177,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7077859660365268,
+ "eval_f1_macro": 0.8263351175441593,
+ "eval_loss": 0.25298377871513367,
+ "eval_pr_auc": 0.7140728736694715,
+ "eval_precision": 0.7032792104425343,
+ "eval_precision_macro": 0.8246534613355044,
+ "eval_pred_class_0": 16527,
+ "eval_pred_class_1": 3141,
+ "eval_predicted_binding_ratio": 0.15970103721781573,
+ "eval_recall": 0.7123508545630441,
+ "eval_recall_macro": 0.8280472206056,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.943,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3006
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.9075655887736425,
+ "eval_auc": 0.9374749229998747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7094918504314478,
+ "eval_f1_macro": 0.8272654245808608,
+ "eval_loss": 0.25285741686820984,
+ "eval_pr_auc": 0.7143683695359583,
+ "eval_precision": 0.7031992397846056,
+ "eval_precision_macro": 0.8249204363177162,
+ "eval_pred_class_0": 16511,
+ "eval_pred_class_1": 3157,
+ "eval_predicted_binding_ratio": 0.16051454138702462,
+ "eval_recall": 0.7158980973879394,
+ "eval_recall_macro": 0.8296699396217176,
+ "eval_runtime": 0.2383,
+ "eval_samples_per_second": 683.908,
+ "eval_steps_per_second": 4.196,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3024
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.9076672767947935,
+ "eval_auc": 0.9377269168628721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7087876844130853,
+ "eval_f1_macro": 0.8269618180373584,
+ "eval_loss": 0.25237372517585754,
+ "eval_pr_auc": 0.7154735567799367,
+ "eval_precision": 0.7049441786283892,
+ "eval_precision_macro": 0.8255259815297635,
+ "eval_pred_class_0": 16533,
+ "eval_pred_class_1": 3135,
+ "eval_predicted_binding_ratio": 0.1593959731543624,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8284197222706846,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.364,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9378333125414714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7099151049175076,
+ "eval_f1_macro": 0.8275952704051472,
+ "eval_loss": 0.2522483766078949,
+ "eval_pr_auc": 0.7157732693940176,
+ "eval_precision": 0.7052832590706556,
+ "eval_precision_macro": 0.8258656401852128,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8293569716527538,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.855,
+ "eval_steps_per_second": 4.521,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3060
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.9079214968476713,
+ "eval_auc": 0.9379969158489405,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7098221438871976,
+ "eval_f1_macro": 0.8275504434498686,
+ "eval_loss": 0.25200438499450684,
+ "eval_pr_auc": 0.7163721445757975,
+ "eval_precision": 0.7054140127388535,
+ "eval_precision_macro": 0.8259040054013725,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7142857142857143,
+ "eval_recall_macro": 0.8292259138217972,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.089,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3078
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9083282489322758,
+ "eval_auc": 0.9382153715205318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7102683593122289,
+ "eval_f1_macro": 0.8279092226905722,
+ "eval_loss": 0.25164341926574707,
+ "eval_pr_auc": 0.7173810220120935,
+ "eval_precision": 0.7078795643818065,
+ "eval_precision_macro": 0.8270148456503497,
+ "eval_pred_class_0": 16546,
+ "eval_pred_class_1": 3122,
+ "eval_predicted_binding_ratio": 0.1587350010168802,
+ "eval_recall": 0.7126733311834892,
+ "eval_recall_macro": 0.8288120685011429,
+ "eval_runtime": 0.2387,
+ "eval_samples_per_second": 682.942,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3096
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.9084807809640024,
+ "eval_auc": 0.9383315970230777,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.711168164313222,
+ "eval_f1_macro": 0.8283970352740591,
+ "eval_loss": 0.2515103816986084,
+ "eval_pr_auc": 0.7177908159595219,
+ "eval_precision": 0.7077610986905142,
+ "eval_precision_macro": 0.8271223707155178,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7146081909061593,
+ "eval_recall_macro": 0.8296889569246799,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.31,
+ "eval_steps_per_second": 3.824,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3114
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.9088366890380313,
+ "eval_auc": 0.9384487568455233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7133493205435651,
+ "eval_f1_macro": 0.8295745121505045,
+ "eval_loss": 0.25142860412597656,
+ "eval_pr_auc": 0.7181535155231535,
+ "eval_precision": 0.7073557387444515,
+ "eval_precision_macro": 0.8273365831908039,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7194453402128346,
+ "eval_recall_macro": 0.8318660877438894,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.694,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3132
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9090400650803335,
+ "eval_auc": 0.9386484385266265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7135308246597278,
+ "eval_f1_macro": 0.8297338931856857,
+ "eval_loss": 0.2510823905467987,
+ "eval_pr_auc": 0.7190368898285651,
+ "eval_precision": 0.7086513994910941,
+ "eval_precision_macro": 0.8279095777411898,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.8315936361680839,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 616.851,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3150
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.9090909090909091,
+ "eval_auc": 0.9388042461024309,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7136450992953235,
+ "eval_f1_macro": 0.8298069567551197,
+ "eval_loss": 0.25084683299064636,
+ "eval_pr_auc": 0.7196986972872739,
+ "eval_precision": 0.7088768692332167,
+ "eval_precision_macro": 0.8280239111672891,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7184779103514995,
+ "eval_recall_macro": 0.83162381664735,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.711,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3168
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.9093451291437868,
+ "eval_auc": 0.9389100189010969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7144001281435207,
+ "eval_f1_macro": 0.8302608322100373,
+ "eval_loss": 0.25058600306510925,
+ "eval_pr_auc": 0.7201232901620662,
+ "eval_precision": 0.7097390197326544,
+ "eval_precision_macro": 0.8285170954889824,
+ "eval_pred_class_0": 16526,
+ "eval_pred_class_1": 3142,
+ "eval_predicted_binding_ratio": 0.1597518812283913,
+ "eval_recall": 0.7191228635923895,
+ "eval_recall_macro": 0.832036834705593,
+ "eval_runtime": 0.2429,
+ "eval_samples_per_second": 671.088,
+ "eval_steps_per_second": 4.117,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3186
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.9091417531014846,
+ "eval_auc": 0.9390935349014322,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.713392141138733,
+ "eval_f1_macro": 0.8297029283682245,
+ "eval_loss": 0.2502758800983429,
+ "eval_pr_auc": 0.7210120001490467,
+ "eval_precision": 0.7096362476068921,
+ "eval_precision_macro": 0.8282970157836081,
+ "eval_pred_class_0": 16534,
+ "eval_pred_class_1": 3134,
+ "eval_predicted_binding_ratio": 0.15934512914378687,
+ "eval_recall": 0.7171880038697195,
+ "eval_recall_macro": 0.83112976580279,
+ "eval_runtime": 0.27,
+ "eval_samples_per_second": 603.763,
+ "eval_steps_per_second": 3.704,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3204
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9097518812283913,
+ "eval_auc": 0.939160767004228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7162270183852918,
+ "eval_f1_macro": 0.8312854205617097,
+ "eval_loss": 0.2502012550830841,
+ "eval_pr_auc": 0.7211443774602971,
+ "eval_precision": 0.7102092580849715,
+ "eval_precision_macro": 0.8290358389250096,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7223476297968398,
+ "eval_recall_macro": 0.8335888568492861,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.383,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3222
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9099552572706935,
+ "eval_auc": 0.9392979952395233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7171378374061651,
+ "eval_f1_macro": 0.8317964319305957,
+ "eval_loss": 0.2500424385070801,
+ "eval_pr_auc": 0.7216701546304439,
+ "eval_precision": 0.7104430379746836,
+ "eval_precision_macro": 0.8292946956289701,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8343648679211326,
+ "eval_runtime": 0.2706,
+ "eval_samples_per_second": 602.263,
+ "eval_steps_per_second": 3.695,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3240
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.910006101281269,
+ "eval_auc": 0.9394494621207929,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7173427020121367,
+ "eval_f1_macro": 0.8319131723763289,
+ "eval_loss": 0.24980410933494568,
+ "eval_pr_auc": 0.722382187322872,
+ "eval_precision": 0.7105346409364125,
+ "eval_precision_macro": 0.8293692166334694,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7242824895195098,
+ "eval_recall_macro": 0.8345261062313551,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.516,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3258
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9102094773235713,
+ "eval_auc": 0.9395796926893379,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7179814755669115,
+ "eval_f1_macro": 0.8322930296138966,
+ "eval_loss": 0.2495713084936142,
+ "eval_pr_auc": 0.7229232947500771,
+ "eval_precision": 0.7111673521037646,
+ "eval_precision_macro": 0.8297461525769929,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8349089438103321,
+ "eval_runtime": 0.2615,
+ "eval_samples_per_second": 623.403,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.910362009355298,
+ "eval_auc": 0.9396445500623882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7187749242303397,
+ "eval_f1_macro": 0.8327294647159501,
+ "eval_loss": 0.2494269460439682,
+ "eval_pr_auc": 0.7232071190390562,
+ "eval_precision": 0.7111742424242424,
+ "eval_precision_macro": 0.8298901515151516,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.726539825862625,
+ "eval_recall_macro": 0.8356547744029127,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.015,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3294
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9106670734187513,
+ "eval_auc": 0.9397990826808293,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7191945021575835,
+ "eval_f1_macro": 0.8330396163256251,
+ "eval_loss": 0.24917152523994446,
+ "eval_pr_auc": 0.7237798760580164,
+ "eval_precision": 0.7129277566539924,
+ "eval_precision_macro": 0.830694740730097,
+ "eval_pred_class_0": 16512,
+ "eval_pred_class_1": 3156,
+ "eval_predicted_binding_ratio": 0.16046369737644905,
+ "eval_recall": 0.7255723960012899,
+ "eval_recall_macro": 0.8354426837856392,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.837,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3312
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9110738255033557,
+ "eval_auc": 0.9399604475135382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7199359487590072,
+ "eval_f1_macro": 0.8335408491792982,
+ "eval_loss": 0.24888525903224945,
+ "eval_pr_auc": 0.7244599264333298,
+ "eval_precision": 0.7150127226463104,
+ "eval_precision_macro": 0.8316954196625403,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.8354220119578544,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.342,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3330
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9109721374822046,
+ "eval_auc": 0.9401063571379032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7194359878224643,
+ "eval_f1_macro": 0.8332638467590944,
+ "eval_loss": 0.2486649453639984,
+ "eval_pr_auc": 0.7249907062273525,
+ "eval_precision": 0.714968152866242,
+ "eval_precision_macro": 0.8315886262879129,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7239600128990649,
+ "eval_recall_macro": 0.8349684775064528,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.8,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3348
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9112263575350824,
+ "eval_auc": 0.9402670406956853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7203715566944267,
+ "eval_f1_macro": 0.8338047799185901,
+ "eval_loss": 0.24845102429389954,
+ "eval_pr_auc": 0.7255585294747666,
+ "eval_precision": 0.7155583837098314,
+ "eval_precision_macro": 0.832000069313312,
+ "eval_pred_class_0": 16525,
+ "eval_pred_class_1": 3143,
+ "eval_predicted_binding_ratio": 0.15980272523896685,
+ "eval_recall": 0.7252499193808449,
+ "eval_recall_macro": 0.8356436112266088,
+ "eval_runtime": 0.2397,
+ "eval_samples_per_second": 680.081,
+ "eval_steps_per_second": 4.172,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3366
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9114805775879601,
+ "eval_auc": 0.9404174175370716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7215736446505677,
+ "eval_f1_macro": 0.8344742146415792,
+ "eval_loss": 0.24825866520404816,
+ "eval_pr_auc": 0.7262464197023197,
+ "eval_precision": 0.7157360406091371,
+ "eval_precision_macro": 0.8322867657635175,
+ "eval_pred_class_0": 16516,
+ "eval_pred_class_1": 3152,
+ "eval_predicted_binding_ratio": 0.16026032133414683,
+ "eval_recall": 0.72750725572396,
+ "eval_recall_macro": 0.8367119184396343,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.496,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3384
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9114297335773845,
+ "eval_auc": 0.9404972726910148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7223461906279885,
+ "eval_f1_macro": 0.8348286515416876,
+ "eval_loss": 0.24819281697273254,
+ "eval_pr_auc": 0.7264794032375063,
+ "eval_precision": 0.7141506460762685,
+ "eval_precision_macro": 0.8317646228259488,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8379923162699333,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.696,
+ "eval_steps_per_second": 4.035,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3402
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9116839536302623,
+ "eval_auc": 0.940656301723974,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.722922316158877,
+ "eval_f1_macro": 0.8351963018783921,
+ "eval_loss": 0.24791164696216583,
+ "eval_pr_auc": 0.7271914277283732,
+ "eval_precision": 0.7152777777777778,
+ "eval_precision_macro": 0.8323358585858586,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8381432186662634,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.211,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3420
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.9118873296725646,
+ "eval_auc": 0.9407912914845091,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7233838786911413,
+ "eval_f1_macro": 0.8354907358742514,
+ "eval_loss": 0.24768619239330292,
+ "eval_pr_auc": 0.727892471269696,
+ "eval_precision": 0.7161820480404552,
+ "eval_precision_macro": 0.8327941262984632,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8382639405833274,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.424,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3438
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9119381736831401,
+ "eval_auc": 0.9409245683252279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7240280433397068,
+ "eval_f1_macro": 0.8358192243316804,
+ "eval_loss": 0.2475385069847107,
+ "eval_pr_auc": 0.7283869598485145,
+ "eval_precision": 0.7155905511811024,
+ "eval_precision_macro": 0.832663401462133,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7326668816510803,
+ "eval_recall_macro": 0.8390804680483324,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.682,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3456
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9120398617042912,
+ "eval_auc": 0.9410766288889338,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7232885476647473,
+ "eval_f1_macro": 0.8354987049773379,
+ "eval_loss": 0.24721089005470276,
+ "eval_pr_auc": 0.7290279509427341,
+ "eval_precision": 0.7175499841320215,
+ "eval_precision_macro": 0.8333466455139735,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.837699192866343,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.255,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3474
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9122432377465934,
+ "eval_auc": 0.9411616325348253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7239283429302623,
+ "eval_f1_macro": 0.8358790547924193,
+ "eval_loss": 0.2470986247062683,
+ "eval_pr_auc": 0.7293209780794321,
+ "eval_precision": 0.7181847032688036,
+ "eval_precision_macro": 0.8337245487646312,
+ "eval_pred_class_0": 16517,
+ "eval_pred_class_1": 3151,
+ "eval_predicted_binding_ratio": 0.16020947732357127,
+ "eval_recall": 0.7297645920670751,
+ "eval_recall_macro": 0.83808203044532,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.922,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3492
+ },
+ {
+ "epoch": 194.44444444444446,
+ "grad_norm": 15854.8017578125,
+ "learning_rate": 7.667662546617938e-07,
+ "loss": 0.2185,
+ "step": 3500
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9121923937360179,
+ "eval_auc": 0.9412192294636534,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7248685677871595,
+ "eval_f1_macro": 0.8363143165624445,
+ "eval_loss": 0.2470363825559616,
+ "eval_pr_auc": 0.7294473908430833,
+ "eval_precision": 0.7163098236775819,
+ "eval_precision_macro": 0.8331124670170592,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.839624543937532,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.796,
+ "eval_steps_per_second": 3.95,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9123449257677445,
+ "eval_auc": 0.9413068608842632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7252151737328658,
+ "eval_f1_macro": 0.8365353589310388,
+ "eval_loss": 0.2468923032283783,
+ "eval_pr_auc": 0.7297206006779651,
+ "eval_precision": 0.7169870784746297,
+ "eval_precision_macro": 0.8334556489675362,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7336343115124153,
+ "eval_recall_macro": 0.83971508537533,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.86,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3528
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9124466137888957,
+ "eval_auc": 0.9415050729580239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7246562200191877,
+ "eval_f1_macro": 0.8363018721763311,
+ "eval_loss": 0.2465437948703766,
+ "eval_pr_auc": 0.7306168212570879,
+ "eval_precision": 0.7186806216301934,
+ "eval_precision_macro": 0.8340602623742853,
+ "eval_pred_class_0": 16515,
+ "eval_pred_class_1": 3153,
+ "eval_predicted_binding_ratio": 0.1603111653447224,
+ "eval_recall": 0.7307320219284101,
+ "eval_recall_macro": 0.8385959258552536,
+ "eval_runtime": 0.2512,
+ "eval_samples_per_second": 648.769,
+ "eval_steps_per_second": 3.98,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3546
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9127008338417735,
+ "eval_auc": 0.9416286951597772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7254996003197443,
+ "eval_f1_macro": 0.836798347664482,
+ "eval_loss": 0.24635502696037292,
+ "eval_pr_auc": 0.7311574695224861,
+ "eval_precision": 0.7194039315155358,
+ "eval_precision_macro": 0.8345112185130059,
+ "eval_pred_class_0": 16514,
+ "eval_pred_class_1": 3154,
+ "eval_predicted_binding_ratio": 0.16036200935529796,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8391400017444531,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 635.086,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3564
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9126499898311979,
+ "eval_auc": 0.941704083096699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7255591054313099,
+ "eval_f1_macro": 0.8368090605158727,
+ "eval_loss": 0.24623039364814758,
+ "eval_pr_auc": 0.7315041353273113,
+ "eval_precision": 0.7188983855650523,
+ "eval_precision_macro": 0.8343113891602595,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7323444050306352,
+ "eval_recall_macro": 0.8393719369271001,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.096,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3582
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.912751677852349,
+ "eval_auc": 0.9417882886776758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266645428480408,
+ "eval_f1_macro": 0.8373778882187448,
+ "eval_loss": 0.24614199995994568,
+ "eval_pr_auc": 0.7317905290059212,
+ "eval_precision": 0.7179729304375196,
+ "eval_precision_macro": 0.8341244192542944,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8407428761951972,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.19,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3600
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9129042098840756,
+ "eval_auc": 0.9419125142943645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7274463007159905,
+ "eval_f1_macro": 0.837808654578745,
+ "eval_loss": 0.2459731251001358,
+ "eval_pr_auc": 0.7322365639924645,
+ "eval_precision": 0.717964824120603,
+ "eval_precision_macro": 0.8342614705412528,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7371815543373106,
+ "eval_recall_macro": 0.8414887067877777,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.607,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3618
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9131075859263779,
+ "eval_auc": 0.9421228725268236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7264286857691692,
+ "eval_f1_macro": 0.8373900508237788,
+ "eval_loss": 0.2455640733242035,
+ "eval_pr_auc": 0.7332689412482398,
+ "eval_precision": 0.721233312142403,
+ "eval_precision_macro": 0.8354381062588301,
+ "eval_pred_class_0": 16522,
+ "eval_pred_class_1": 3146,
+ "eval_predicted_binding_ratio": 0.1599552572706935,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8393814455785812,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 643.062,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3636
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9132092739475289,
+ "eval_auc": 0.9422253168108461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7266613290632506,
+ "eval_f1_macro": 0.8375381529725912,
+ "eval_loss": 0.24535632133483887,
+ "eval_pr_auc": 0.7337329229212076,
+ "eval_precision": 0.7216921119592875,
+ "eval_precision_macro": 0.8356705536799585,
+ "eval_pred_class_0": 16524,
+ "eval_pred_class_1": 3144,
+ "eval_predicted_binding_ratio": 0.15985356924954242,
+ "eval_recall": 0.7316994517897453,
+ "eval_recall_macro": 0.8394418065371132,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.363,
+ "eval_steps_per_second": 3.781,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3654
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.9133109619686801,
+ "eval_auc": 0.9423872461284946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7268066015061689,
+ "eval_f1_macro": 0.8376441226295008,
+ "eval_loss": 0.2451435625553131,
+ "eval_pr_auc": 0.7344416461934514,
+ "eval_precision": 0.7222929936305732,
+ "eval_precision_macro": 0.8359468356342605,
+ "eval_pred_class_0": 16528,
+ "eval_pred_class_1": 3140,
+ "eval_predicted_binding_ratio": 0.1596501932072402,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.8393711096646888,
+ "eval_runtime": 0.24,
+ "eval_samples_per_second": 679.026,
+ "eval_steps_per_second": 4.166,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3672
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9138194020744357,
+ "eval_auc": 0.9424586242758461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7293629251157592,
+ "eval_f1_macro": 0.8390563302747484,
+ "eval_loss": 0.24504327774047852,
+ "eval_pr_auc": 0.7346240551024029,
+ "eval_precision": 0.7223276407337128,
+ "eval_precision_macro": 0.8364152440915626,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7365366010964205,
+ "eval_recall_macro": 0.8417698397526527,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.36,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3690
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.91376855806386,
+ "eval_auc": 0.9426215171108914,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.729073482428115,
+ "eval_f1_macro": 0.8388988164347613,
+ "eval_loss": 0.2448122650384903,
+ "eval_pr_auc": 0.7352376772544322,
+ "eval_precision": 0.7223805001582779,
+ "eval_precision_macro": 0.8363855980711433,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7358916478555305,
+ "eval_recall_macro": 0.8414775436114739,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.41,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3708
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9139210900955868,
+ "eval_auc": 0.9426929439207377,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7299409794225554,
+ "eval_f1_macro": 0.8393709493840633,
+ "eval_loss": 0.24476298689842224,
+ "eval_pr_auc": 0.7354485111055644,
+ "eval_precision": 0.7222222222222222,
+ "eval_precision_macro": 0.8364747474747475,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8423544320350108,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.323,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3726
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9138702460850112,
+ "eval_auc": 0.9428956913390121,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.72896,
+ "eval_f1_macro": 0.8388800483588226,
+ "eval_loss": 0.24439764022827148,
+ "eval_pr_auc": 0.736435270049487,
+ "eval_precision": 0.723404255319149,
+ "eval_precision_macro": 0.8367914187788916,
+ "eval_pred_class_0": 16519,
+ "eval_pred_class_1": 3149,
+ "eval_predicted_binding_ratio": 0.16010778930242017,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.8410136732461799,
+ "eval_runtime": 0.2835,
+ "eval_samples_per_second": 574.921,
+ "eval_steps_per_second": 3.527,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9140736221273134,
+ "eval_auc": 0.9429205676063466,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.730805989168525,
+ "eval_f1_macro": 0.83984185960937,
+ "eval_loss": 0.24438706040382385,
+ "eval_pr_auc": 0.7364306271247294,
+ "eval_precision": 0.722064841045011,
+ "eval_precision_macro": 0.8365645289452815,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7397613673008707,
+ "eval_recall_macro": 0.8432313204585479,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.222,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3762
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9142769981696156,
+ "eval_auc": 0.9430619808161933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7309288222151292,
+ "eval_f1_macro": 0.8399730291904192,
+ "eval_loss": 0.2441486269235611,
+ "eval_pr_auc": 0.7370811402900591,
+ "eval_precision": 0.7235387045813586,
+ "eval_precision_macro": 0.8371980622222068,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7384714608190907,
+ "eval_recall_macro": 0.842827811051786,
+ "eval_runtime": 0.2675,
+ "eval_samples_per_second": 609.291,
+ "eval_steps_per_second": 3.738,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3780
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9146329062436445,
+ "eval_auc": 0.9431815542983806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7325155328978812,
+ "eval_f1_macro": 0.8408637738901821,
+ "eval_loss": 0.24397221207618713,
+ "eval_pr_auc": 0.7374915342644908,
+ "eval_precision": 0.7238664987405542,
+ "eval_precision_macro": 0.8376184300639468,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7413737504030957,
+ "eval_recall_macro": 0.8442185948852564,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.425,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3798
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9432977798009264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7319636884854276,
+ "eval_f1_macro": 0.8405258137499286,
+ "eval_loss": 0.24377743899822235,
+ "eval_pr_auc": 0.7380125440447487,
+ "eval_precision": 0.7230962869729389,
+ "eval_precision_macro": 0.837200053735105,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7410512737826508,
+ "eval_recall_macro": 0.8439668151372359,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.044,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3816
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9144295302013423,
+ "eval_auc": 0.9434619086633391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7311072056239016,
+ "eval_f1_macro": 0.8401129643018078,
+ "eval_loss": 0.24348998069763184,
+ "eval_pr_auc": 0.7388402605739814,
+ "eval_precision": 0.7245091830272324,
+ "eval_precision_macro": 0.8376331499630408,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7378265075782006,
+ "eval_recall_macro": 0.8426562368276709,
+ "eval_runtime": 0.2496,
+ "eval_samples_per_second": 652.954,
+ "eval_steps_per_second": 4.006,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3834
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9147854382753712,
+ "eval_auc": 0.943601764673353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310654685494223,
+ "eval_f1_macro": 0.8402185728440683,
+ "eval_loss": 0.24328412115573883,
+ "eval_pr_auc": 0.7395557790782865,
+ "eval_precision": 0.727563078888534,
+ "eval_precision_macro": 0.838897945080114,
+ "eval_pred_class_0": 16537,
+ "eval_pred_class_1": 3131,
+ "eval_predicted_binding_ratio": 0.15919259711206019,
+ "eval_recall": 0.7346017413737504,
+ "eval_recall_macro": 0.841556921872968,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.38,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3852
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9146837502542201,
+ "eval_auc": 0.9436924715636332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7310897435897435,
+ "eval_f1_macro": 0.8401943762667112,
+ "eval_loss": 0.2431441992521286,
+ "eval_pr_auc": 0.7398761361047077,
+ "eval_precision": 0.7266645428480408,
+ "eval_precision_macro": 0.838527383046018,
+ "eval_pred_class_0": 16529,
+ "eval_pred_class_1": 3139,
+ "eval_predicted_binding_ratio": 0.15959934919666463,
+ "eval_recall": 0.7355691712350855,
+ "eval_recall_macro": 0.8418897344073055,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 631.075,
+ "eval_steps_per_second": 3.872,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3870
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9147345942647956,
+ "eval_auc": 0.9437266715649686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7332591060919358,
+ "eval_f1_macro": 0.8412581348487456,
+ "eval_loss": 0.24318096041679382,
+ "eval_pr_auc": 0.739819563727558,
+ "eval_precision": 0.7234777150031387,
+ "eval_precision_macro": 0.8375913025931845,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7433086101257659,
+ "eval_recall_macro": 0.8450653028295274,
+ "eval_runtime": 0.2695,
+ "eval_samples_per_second": 604.716,
+ "eval_steps_per_second": 3.71,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3888
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.915039658328249,
+ "eval_auc": 0.9438657295100676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7336202773792444,
+ "eval_f1_macro": 0.8415401994826537,
+ "eval_loss": 0.24295340478420258,
+ "eval_pr_auc": 0.740473021691125,
+ "eval_precision": 0.7254098360655737,
+ "eval_precision_macro": 0.8384566154139702,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8447221543812975,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.926,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3906
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9148362822859467,
+ "eval_auc": 0.9439684365715622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7333227193122114,
+ "eval_f1_macro": 0.8413247993777817,
+ "eval_loss": 0.24282881617546082,
+ "eval_pr_auc": 0.7409189185674594,
+ "eval_precision": 0.7242138364779874,
+ "eval_precision_macro": 0.837907500480624,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8448635481261465,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.434,
+ "eval_steps_per_second": 3.923,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3924
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9149888143176734,
+ "eval_auc": 0.9440560582596731,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7337579617834394,
+ "eval_f1_macro": 0.8415885646284089,
+ "eval_loss": 0.24268390238285065,
+ "eval_pr_auc": 0.7412204047828347,
+ "eval_precision": 0.724756212645486,
+ "eval_precision_macro": 0.8382104794199593,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7429861335053208,
+ "eval_recall_macro": 0.845085147394901,
+ "eval_runtime": 0.2558,
+ "eval_samples_per_second": 637.151,
+ "eval_steps_per_second": 3.909,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3942
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9152430343705511,
+ "eval_auc": 0.9441587945186644,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7346808849275823,
+ "eval_f1_macro": 0.842123366857946,
+ "eval_loss": 0.24253520369529724,
+ "eval_pr_auc": 0.7415422610927452,
+ "eval_precision": 0.7253299811439347,
+ "eval_precision_macro": 0.8386142808788943,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7442760399871009,
+ "eval_recall_macro": 0.8457602811150571,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.818,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3960
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.9442562168332251,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7354531001589825,
+ "eval_f1_macro": 0.8425495241156832,
+ "eval_loss": 0.2423904687166214,
+ "eval_pr_auc": 0.741902799087436,
+ "eval_precision": 0.7253057384760113,
+ "eval_precision_macro": 0.8387436514456639,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8465061117076376,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.926,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9153955664022778,
+ "eval_auc": 0.944399226172901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.734609250398724,
+ "eval_f1_macro": 0.8421428275824746,
+ "eval_loss": 0.24210123717784882,
+ "eval_pr_auc": 0.7426309034006747,
+ "eval_precision": 0.7267276743452193,
+ "eval_precision_macro": 0.8391805533372256,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.7426636568848759,
+ "eval_recall_macro": 0.8451955333980726,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.607,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3996
+ },
+ {
+ "epoch": 222.22222222222223,
+ "grad_norm": 16301.5107421875,
+ "learning_rate": 6.802697587657594e-07,
+ "loss": 0.211,
+ "step": 4000
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9156497864551556,
+ "eval_auc": 0.9445356174132858,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.735026353617633,
+ "eval_f1_macro": 0.8424338117294514,
+ "eval_loss": 0.2418563961982727,
+ "eval_pr_auc": 0.7432491183346004,
+ "eval_precision": 0.7281645569620253,
+ "eval_precision_macro": 0.8398516024451512,
+ "eval_pred_class_0": 16508,
+ "eval_pred_class_1": 3160,
+ "eval_predicted_binding_ratio": 0.16066707341875128,
+ "eval_recall": 0.7420187036439858,
+ "eval_recall_macro": 0.8450843201324897,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.886,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4014
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9159040065080334,
+ "eval_auc": 0.9445885524751136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.736624203821656,
+ "eval_f1_macro": 0.8432939508943711,
+ "eval_loss": 0.24180874228477478,
+ "eval_pr_auc": 0.7434863322076849,
+ "eval_precision": 0.7275872916011324,
+ "eval_precision_macro": 0.8398989281099847,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.745888423089326,
+ "eval_recall_macro": 0.8468079165002977,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.671,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4032
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9159548505186089,
+ "eval_auc": 0.9446536531606378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7374106433677522,
+ "eval_f1_macro": 0.8436909456056703,
+ "eval_loss": 0.24177636206150055,
+ "eval_pr_auc": 0.7437995583771988,
+ "eval_precision": 0.7266750156543519,
+ "eval_precision_macro": 0.8396638402297497,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7484682360528861,
+ "eval_recall_macro": 0.8478865596272157,
+ "eval_runtime": 0.2167,
+ "eval_samples_per_second": 752.113,
+ "eval_steps_per_second": 4.614,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4050
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9161582265609112,
+ "eval_auc": 0.9447719419529625,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.737629276054097,
+ "eval_f1_macro": 0.843868342907385,
+ "eval_loss": 0.24154822528362274,
+ "eval_pr_auc": 0.7443388353128296,
+ "eval_precision": 0.7280150753768844,
+ "eval_precision_macro": 0.8402572343640063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7475008061915511,
+ "eval_recall_macro": 0.8476141080514102,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.202,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4068
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9448749507219245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7381974248927039,
+ "eval_f1_macro": 0.8441781495775367,
+ "eval_loss": 0.2414349913597107,
+ "eval_pr_auc": 0.7447448668341484,
+ "eval_precision": 0.7278996865203762,
+ "eval_precision_macro": 0.840312265884293,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8481987003337683,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.465,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4086
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9163107585926378,
+ "eval_auc": 0.9449481196490842,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.738314785373609,
+ "eval_f1_macro": 0.8442527143596241,
+ "eval_loss": 0.2413274347782135,
+ "eval_pr_auc": 0.7451543649133733,
+ "eval_precision": 0.7281279397930386,
+ "eval_precision_macro": 0.840427826926679,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7487907126733312,
+ "eval_recall_macro": 0.8482288808130343,
+ "eval_runtime": 0.2185,
+ "eval_samples_per_second": 746.149,
+ "eval_steps_per_second": 4.578,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4104
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9162599145820622,
+ "eval_auc": 0.9450982726429946,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7376135096383623,
+ "eval_f1_macro": 0.843896745442007,
+ "eval_loss": 0.2410273402929306,
+ "eval_pr_auc": 0.7459954701052622,
+ "eval_precision": 0.728904282115869,
+ "eval_precision_macro": 0.8406224054285385,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7465333763302161,
+ "eval_recall_macro": 0.8472812955170728,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.64,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4122
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9167683546878178,
+ "eval_auc": 0.9451833444163787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7392066273697626,
+ "eval_f1_macro": 0.8448445490519523,
+ "eval_loss": 0.2408701479434967,
+ "eval_pr_auc": 0.74641099889946,
+ "eval_precision": 0.7304785894206549,
+ "eval_precision_macro": 0.8415611477299734,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7481457594324411,
+ "eval_recall_macro": 0.8482383894645154,
+ "eval_runtime": 0.2678,
+ "eval_samples_per_second": 608.615,
+ "eval_steps_per_second": 3.734,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4140
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9167175106772423,
+ "eval_auc": 0.9452793360535927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7393380012730745,
+ "eval_f1_macro": 0.8448898647294817,
+ "eval_loss": 0.24077175557613373,
+ "eval_pr_auc": 0.7467905435777061,
+ "eval_precision": 0.7298146402764687,
+ "eval_precision_macro": 0.8413101105537636,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7491131892937762,
+ "eval_recall_macro": 0.8486013824781189,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.884,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4158
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9163616026032133,
+ "eval_auc": 0.9453335850027798,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7391786903440621,
+ "eval_f1_macro": 0.8446869866386211,
+ "eval_loss": 0.24075280129909515,
+ "eval_pr_auc": 0.7469096614042753,
+ "eval_precision": 0.7270742358078602,
+ "eval_precision_macro": 0.840149923152381,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.8494385817709088,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.647,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4176
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9168191986983933,
+ "eval_auc": 0.9454244865430391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7405645417063115,
+ "eval_f1_macro": 0.845516906033295,
+ "eval_loss": 0.24061860144138336,
+ "eval_pr_auc": 0.7472467341999135,
+ "eval_precision": 0.7285491419656787,
+ "eval_precision_macro": 0.8410102813636934,
+ "eval_pred_class_0": 16463,
+ "eval_pred_class_1": 3205,
+ "eval_predicted_binding_ratio": 0.1629550538946512,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8502344374081289,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.77,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4194
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9171242627618467,
+ "eval_auc": 0.9455514956544295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7409408773045136,
+ "eval_f1_macro": 0.8458063544009555,
+ "eval_loss": 0.24034352600574493,
+ "eval_pr_auc": 0.7478642707879094,
+ "eval_precision": 0.7304920087746788,
+ "eval_precision_macro": 0.841880100399963,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7516930022573364,
+ "eval_recall_macro": 0.849891288959899,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.626,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9170734187512711,
+ "eval_auc": 0.945608099868364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7413984461709212,
+ "eval_f1_macro": 0.8460087995182923,
+ "eval_loss": 0.24029456079006195,
+ "eval_pr_auc": 0.7479864003180418,
+ "eval_precision": 0.7292576419213974,
+ "eval_precision_macro": 0.8414542370705274,
+ "eval_pred_class_0": 16462,
+ "eval_pred_class_1": 3206,
+ "eval_predicted_binding_ratio": 0.16300589790522677,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8507785132973285,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.582,
+ "eval_steps_per_second": 4.617,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4230
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.9172767947935733,
+ "eval_auc": 0.9457173958316524,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7416229950770208,
+ "eval_f1_macro": 0.8461890816058248,
+ "eval_loss": 0.2400863915681839,
+ "eval_pr_auc": 0.7485735786505677,
+ "eval_precision": 0.7306007509386734,
+ "eval_precision_macro": 0.8420487970332027,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.850506061721523,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.11,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4248
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9173784828147244,
+ "eval_auc": 0.9457852508143816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7421044278685923,
+ "eval_f1_macro": 0.8464570875531852,
+ "eval_loss": 0.24001120030879974,
+ "eval_pr_auc": 0.7487297504117033,
+ "eval_precision": 0.730625,
+ "eval_precision_macro": 0.8421463596065095,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7539503386004515,
+ "eval_recall_macro": 0.8509595961729245,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.497,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4266
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9176835468781778,
+ "eval_auc": 0.9458918216779619,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7428934413212641,
+ "eval_f1_macro": 0.8469453737675663,
+ "eval_loss": 0.23980914056301117,
+ "eval_pr_auc": 0.7491659673680734,
+ "eval_precision": 0.7318523153942428,
+ "eval_precision_macro": 0.8427959974251447,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7542728152208965,
+ "eval_recall_macro": 0.8512717368794771,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.674,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4284
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9175818588570266,
+ "eval_auc": 0.9459814190633611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7427392477384542,
+ "eval_f1_macro": 0.8468350393376697,
+ "eval_loss": 0.2397017627954483,
+ "eval_pr_auc": 0.7495363660441035,
+ "eval_precision": 0.73125,
+ "eval_precision_macro": 0.8425195834345397,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8513424337519016,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.187,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4302
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9177852348993288,
+ "eval_auc": 0.9460787245879343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7425569176882661,
+ "eval_f1_macro": 0.846819224235148,
+ "eval_loss": 0.2394852489233017,
+ "eval_pr_auc": 0.7500368484248636,
+ "eval_precision": 0.7333333333333333,
+ "eval_precision_macro": 0.8433466763706938,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7520154788777813,
+ "eval_recall_macro": 0.8504146930213136,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.01,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4320
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.91788692292048,
+ "eval_auc": 0.9461055278900622,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7437728066000318,
+ "eval_f1_macro": 0.8474411515820368,
+ "eval_loss": 0.23946216702461243,
+ "eval_pr_auc": 0.7500126507936957,
+ "eval_precision": 0.7320424734540912,
+ "eval_precision_macro": 0.8430344761294506,
+ "eval_pred_class_0": 16466,
+ "eval_pred_class_1": 3202,
+ "eval_predicted_binding_ratio": 0.16280252186292454,
+ "eval_recall": 0.7558851983231216,
+ "eval_recall_macro": 0.8520477479513235,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.523,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4338
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9178360789099044,
+ "eval_auc": 0.9462651603379567,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7429207763283487,
+ "eval_f1_macro": 0.8470125818101653,
+ "eval_loss": 0.23918889462947845,
+ "eval_pr_auc": 0.7507506323790389,
+ "eval_precision": 0.7331240188383046,
+ "eval_precision_macro": 0.8433259480225619,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7529829087391164,
+ "eval_recall_macro": 0.8508380469934491,
+ "eval_runtime": 0.2685,
+ "eval_samples_per_second": 607.073,
+ "eval_steps_per_second": 3.724,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4356
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9181411429733577,
+ "eval_auc": 0.9463650352422546,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7440381558028617,
+ "eval_f1_macro": 0.847659094847506,
+ "eval_loss": 0.2390899360179901,
+ "eval_pr_auc": 0.7511682669955798,
+ "eval_precision": 0.7337723424270931,
+ "eval_precision_macro": 0.8437961778887089,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8516744190238277,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.141,
+ "eval_steps_per_second": 3.749,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4374
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.918446207036811,
+ "eval_auc": 0.9465344196541042,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7436061381074168,
+ "eval_f1_macro": 0.8475588127054617,
+ "eval_loss": 0.2388136237859726,
+ "eval_pr_auc": 0.7520628478642977,
+ "eval_precision": 0.7372424722662441,
+ "eval_precision_macro": 0.8451548762954184,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7500806191551113,
+ "eval_recall_macro": 0.8500206922660327,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.538,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4392
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9182936750050844,
+ "eval_auc": 0.946561008841255,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7443931923015746,
+ "eval_f1_macro": 0.8478842115097998,
+ "eval_loss": 0.2388090342283249,
+ "eval_pr_auc": 0.7520269916576209,
+ "eval_precision": 0.7344632768361582,
+ "eval_precision_macro": 0.844145847858681,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8517649604616258,
+ "eval_runtime": 0.2356,
+ "eval_samples_per_second": 691.824,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4410
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.9185478950579622,
+ "eval_auc": 0.9466103526109676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7456335344553826,
+ "eval_f1_macro": 0.8485719582198821,
+ "eval_loss": 0.23877908289432526,
+ "eval_pr_auc": 0.7522268586665612,
+ "eval_precision": 0.7344385361276197,
+ "eval_precision_macro": 0.844360910951309,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8529643255056077,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.409,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4428
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9181919869839333,
+ "eval_auc": 0.9466863828928207,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.74464370734804,
+ "eval_f1_macro": 0.847968894691123,
+ "eval_loss": 0.23864901065826416,
+ "eval_pr_auc": 0.7525479720158522,
+ "eval_precision": 0.733125,
+ "eval_precision_macro": 0.84363925491863,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8524909464888326,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.776,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9184970510473867,
+ "eval_auc": 0.9467725739035849,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7454343338097507,
+ "eval_f1_macro": 0.8484579580910492,
+ "eval_loss": 0.23847386240959167,
+ "eval_pr_auc": 0.752960490937812,
+ "eval_precision": 0.7343554443053817,
+ "eval_precision_macro": 0.8442903982090288,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7568526281844566,
+ "eval_recall_macro": 0.8528030871953852,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.673,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4464
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9183953630262355,
+ "eval_auc": 0.9468334214870647,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7452785272179019,
+ "eval_f1_macro": 0.8483468464756075,
+ "eval_loss": 0.238382488489151,
+ "eval_pr_auc": 0.7531576423642267,
+ "eval_precision": 0.73375,
+ "eval_precision_macro": 0.8440124787466602,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8528737840678097,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.912,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4482
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 32703.09375,
+ "learning_rate": 5.870150616070439e-07,
+ "loss": 0.2045,
+ "step": 4500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9189546471425666,
+ "eval_auc": 0.9469757203543168,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7464206172446707,
+ "eval_f1_macro": 0.849095331315225,
+ "eval_loss": 0.23810486495494843,
+ "eval_pr_auc": 0.7538503066163141,
+ "eval_precision": 0.7365777080062794,
+ "eval_precision_macro": 0.8453864697284325,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7565301515640116,
+ "eval_recall_macro": 0.8529436536778228,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.492,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9192088671954444,
+ "eval_auc": 0.9470911964544428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465305471367044,
+ "eval_f1_macro": 0.8492382980338313,
+ "eval_loss": 0.23794293403625488,
+ "eval_pr_auc": 0.7543678751573928,
+ "eval_precision": 0.7386363636363636,
+ "eval_precision_macro": 0.8462575757575758,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7545952918413416,
+ "eval_recall_macro": 0.8523082090884139,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4518
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9186495830791133,
+ "eval_auc": 0.9471146615094285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7459510955859003,
+ "eval_f1_macro": 0.8487610069611806,
+ "eval_loss": 0.2380078136920929,
+ "eval_pr_auc": 0.7543445385135205,
+ "eval_precision": 0.7347513293712856,
+ "eval_precision_macro": 0.8445476639570896,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8531557442950962,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.051,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4536
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9187004270896888,
+ "eval_auc": 0.9472153636761378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7461501825686617,
+ "eval_f1_macro": 0.8488749520465066,
+ "eval_loss": 0.23785638809204102,
+ "eval_pr_auc": 0.7549126832149435,
+ "eval_precision": 0.7348342714196373,
+ "eval_precision_macro": 0.8446181071730852,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8533169826053187,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.46,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4554
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.918903803131991,
+ "eval_auc": 0.9473291852514412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7465437788018433,
+ "eval_f1_macro": 0.849136671654349,
+ "eval_loss": 0.23767386376857758,
+ "eval_pr_auc": 0.7554825230801616,
+ "eval_precision": 0.7359022556390977,
+ "eval_precision_macro": 0.8451300547435596,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7574975814253466,
+ "eval_recall_macro": 0.8533066466914263,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.53,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4572
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9196156192800488,
+ "eval_auc": 0.9474168847995438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7481280866656046,
+ "eval_f1_macro": 0.8501522492676461,
+ "eval_loss": 0.23749451339244843,
+ "eval_pr_auc": 0.7559063738482461,
+ "eval_precision": 0.739294710327456,
+ "eval_precision_macro": 0.8468181046180088,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.8535981155701939,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.581,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4590
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9195647752694732,
+ "eval_auc": 0.9474761751831905,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7480089200382287,
+ "eval_f1_macro": 0.8500768176935048,
+ "eval_loss": 0.23738548159599304,
+ "eval_pr_auc": 0.7561572732622138,
+ "eval_precision": 0.7390620081838212,
+ "eval_precision_macro": 0.8467003692001515,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853567935090928,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.111,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4608
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9475989409250355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486051331101546,
+ "eval_f1_macro": 0.8504541559450298,
+ "eval_loss": 0.23718814551830292,
+ "eval_pr_auc": 0.7567216824275462,
+ "eval_precision": 0.7402269861286255,
+ "eval_precision_macro": 0.8472897782243516,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7571751048049017,
+ "eval_recall_macro": 0.853718837487258,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.368,
+ "eval_steps_per_second": 4.665,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4626
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9193613992271711,
+ "eval_auc": 0.9475927315907009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.748013981569749,
+ "eval_f1_macro": 0.8500072329009691,
+ "eval_loss": 0.23723167181015015,
+ "eval_pr_auc": 0.7565508642499409,
+ "eval_precision": 0.7372377074851237,
+ "eval_precision_macro": 0.845948140540741,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.8542335601596028,
+ "eval_runtime": 0.2406,
+ "eval_samples_per_second": 677.35,
+ "eval_steps_per_second": 4.156,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4644
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9197681513117755,
+ "eval_auc": 0.9476849762158163,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7486460656259956,
+ "eval_f1_macro": 0.8504558902151395,
+ "eval_loss": 0.23700466752052307,
+ "eval_pr_auc": 0.7570817989267699,
+ "eval_precision": 0.7396915328926661,
+ "eval_precision_macro": 0.8470757706910725,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7578200580457917,
+ "eval_recall_macro": 0.8539507726699049,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.906,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4662
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9199715273540777,
+ "eval_auc": 0.9477206555569931,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495225970719287,
+ "eval_f1_macro": 0.8509503339952407,
+ "eval_loss": 0.23693729937076569,
+ "eval_pr_auc": 0.7572519889982183,
+ "eval_precision": 0.7398680490103676,
+ "eval_precision_macro": 0.8473073942352414,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8547267837417515,
+ "eval_runtime": 0.2431,
+ "eval_samples_per_second": 670.398,
+ "eval_steps_per_second": 4.113,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.919818995322351,
+ "eval_auc": 0.9477750407611654,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7492447129909365,
+ "eval_f1_macro": 0.8507623994645729,
+ "eval_loss": 0.23685960471630096,
+ "eval_pr_auc": 0.7574807332819814,
+ "eval_precision": 0.739021329987453,
+ "eval_precision_macro": 0.8469075096539207,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.85476730013491,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.06,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4698
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.947870010485989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23665639758110046,
+ "eval_pr_auc": 0.7579557575566394,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4716
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9201749033963799,
+ "eval_auc": 0.9479619047411422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501591343093571,
+ "eval_f1_macro": 0.8513291133243506,
+ "eval_loss": 0.23653987050056458,
+ "eval_pr_auc": 0.7584013256784117,
+ "eval_precision": 0.7404963870562362,
+ "eval_precision_macro": 0.8476822244653337,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7600773943889068,
+ "eval_recall_macro": 0.8551096213207285,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.941,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4734
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.9203274354281066,
+ "eval_auc": 0.9480515799865329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7501195981502152,
+ "eval_f1_macro": 0.8513640482812168,
+ "eval_loss": 0.2363332211971283,
+ "eval_pr_auc": 0.7588834286830018,
+ "eval_precision": 0.7419558359621451,
+ "eval_precision_macro": 0.848278196802748,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7584650112866818,
+ "eval_recall_macro": 0.8545448736037441,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.351,
+ "eval_steps_per_second": 3.953,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4752
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9481172938194913,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.750557502389296,
+ "eval_f1_macro": 0.8515931077800434,
+ "eval_loss": 0.23625436425209045,
+ "eval_pr_auc": 0.7591087523185005,
+ "eval_precision": 0.7415801070192005,
+ "eval_precision_macro": 0.8482019751638359,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8550992854068361,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.903,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4770
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9200223713646533,
+ "eval_auc": 0.9482093632596256,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7495621716287215,
+ "eval_f1_macro": 0.8509874086097018,
+ "eval_loss": 0.23614051938056946,
+ "eval_pr_auc": 0.7594934170637511,
+ "eval_precision": 0.740251572327044,
+ "eval_precision_macro": 0.8474729477355745,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7591099645275717,
+ "eval_recall_macro": 0.854625906390061,
+ "eval_runtime": 0.2555,
+ "eval_samples_per_second": 638.024,
+ "eval_steps_per_second": 3.914,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4788
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9204291234492576,
+ "eval_auc": 0.9483264257570818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7504385265507894,
+ "eval_f1_macro": 0.8515537559413557,
+ "eval_loss": 0.23595084249973297,
+ "eval_pr_auc": 0.7600448623712702,
+ "eval_precision": 0.7422712933753943,
+ "eval_precision_macro": 0.8484662322132155,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7587874879071267,
+ "eval_recall_macro": 0.8547362923932326,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.531,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4806
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9203782794386821,
+ "eval_auc": 0.9483670102777331,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7509541984732825,
+ "eval_f1_macro": 0.8517842887791249,
+ "eval_loss": 0.23594258725643158,
+ "eval_pr_auc": 0.7601171664174119,
+ "eval_precision": 0.740822089739567,
+ "eval_precision_macro": 0.8479609508220922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8557545745616185,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.883,
+ "eval_steps_per_second": 4.061,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4824
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.920276591417531,
+ "eval_auc": 0.9484603741402287,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7505567928730512,
+ "eval_f1_macro": 0.8515567625484772,
+ "eval_loss": 0.23575998842716217,
+ "eval_pr_auc": 0.7605631058573062,
+ "eval_precision": 0.7406593406593407,
+ "eval_precision_macro": 0.8478216317444613,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8554320979411736,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.928,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4842
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9206833435021354,
+ "eval_auc": 0.9485513243429828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.751434034416826,
+ "eval_f1_macro": 0.8521235507837306,
+ "eval_loss": 0.235606849193573,
+ "eval_pr_auc": 0.7610077759721279,
+ "eval_precision": 0.7426771653543307,
+ "eval_precision_macro": 0.8488138752255192,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7603998710093518,
+ "eval_recall_macro": 0.8555424839443451,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.303,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4860
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.948647082400222,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7519948930737312,
+ "eval_f1_macro": 0.8525018311755109,
+ "eval_loss": 0.23542079329490662,
+ "eval_pr_auc": 0.7614859215167992,
+ "eval_precision": 0.744391785150079,
+ "eval_precision_macro": 0.8496242389363071,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7597549177684618,
+ "eval_recall_macro": 0.8554614511580283,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.61,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4878
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487102755159504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517120560598821,
+ "eval_f1_macro": 0.8522755458325244,
+ "eval_loss": 0.2353215366601944,
+ "eval_pr_auc": 0.761752604262035,
+ "eval_precision": 0.7426054122089364,
+ "eval_precision_macro": 0.8488345435817272,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8558347800855242,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.279,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4896
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.920734187512711,
+ "eval_auc": 0.9487825490532058,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7517911160643209,
+ "eval_f1_macro": 0.8523136490925144,
+ "eval_loss": 0.23522616922855377,
+ "eval_pr_auc": 0.7620046741511783,
+ "eval_precision": 0.7424528301886792,
+ "eval_precision_macro": 0.8487858522607636,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.7613673008706868,
+ "eval_recall_macro": 0.8559658379164806,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.657,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9208358755338621,
+ "eval_auc": 0.9488776355680169,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7518725099601593,
+ "eval_f1_macro": 0.8523888728682258,
+ "eval_loss": 0.2350645512342453,
+ "eval_pr_auc": 0.7623918274747735,
+ "eval_precision": 0.7432262129804663,
+ "eval_precision_macro": 0.8491200787225601,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7607223476297968,
+ "eval_recall_macro": 0.8557640832130997,
+ "eval_runtime": 0.2427,
+ "eval_samples_per_second": 671.591,
+ "eval_steps_per_second": 4.12,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4932
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9489531597599242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7525510204081632,
+ "eval_f1_macro": 0.8528058755561261,
+ "eval_loss": 0.2349635362625122,
+ "eval_pr_auc": 0.7627186958629152,
+ "eval_precision": 0.7442447177546515,
+ "eval_precision_macro": 0.8496637300357182,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8560460434403863,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.549,
+ "eval_steps_per_second": 3.838,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4950
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9489723619803666,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524626628535113,
+ "eval_f1_macro": 0.8526552767085183,
+ "eval_loss": 0.2350020557641983,
+ "eval_pr_auc": 0.7627735453579831,
+ "eval_precision": 0.7416222987785781,
+ "eval_precision_macro": 0.8485653223786669,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8569134232124421,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.627,
+ "eval_steps_per_second": 3.93,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4968
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9209884075655888,
+ "eval_auc": 0.9490740568619694,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7524689391525964,
+ "eval_f1_macro": 0.8527303253449472,
+ "eval_loss": 0.2348015159368515,
+ "eval_pr_auc": 0.7632371897507115,
+ "eval_precision": 0.7434686811457349,
+ "eval_precision_macro": 0.8493281796365992,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7616897774911319,
+ "eval_recall_macro": 0.8562477981437672,
+ "eval_runtime": 0.2476,
+ "eval_samples_per_second": 658.287,
+ "eval_steps_per_second": 4.039,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4986
+ },
+ {
+ "epoch": 277.77777777777777,
+ "grad_norm": 14799.8212890625,
+ "learning_rate": 4.904982238472025e-07,
+ "loss": 0.199,
+ "step": 5000
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.9207850315232866,
+ "eval_auc": 0.9491519363186243,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7514358647096363,
+ "eval_f1_macro": 0.8521604145127957,
+ "eval_loss": 0.23470228910446167,
+ "eval_pr_auc": 0.7636185597213633,
+ "eval_precision": 0.7436059362172402,
+ "eval_precision_macro": 0.8491982774838095,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7594324411480168,
+ "eval_recall_macro": 0.8552096714100077,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.482,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5004
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9209375635550132,
+ "eval_auc": 0.9492209202712323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7521912350597609,
+ "eval_f1_macro": 0.8525784825369885,
+ "eval_loss": 0.2345963418483734,
+ "eval_pr_auc": 0.7638862490185815,
+ "eval_precision": 0.7435412728418399,
+ "eval_precision_macro": 0.8493079227068421,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7610448242502419,
+ "eval_recall_macro": 0.8559555020025882,
+ "eval_runtime": 0.273,
+ "eval_samples_per_second": 597.168,
+ "eval_steps_per_second": 3.664,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5022
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9492453877736104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7532591414944356,
+ "eval_f1_macro": 0.8531471523002045,
+ "eval_loss": 0.23453067243099213,
+ "eval_pr_auc": 0.7639137465976396,
+ "eval_precision": 0.7428661022264033,
+ "eval_precision_macro": 0.8492229655497572,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8572255639189947,
+ "eval_runtime": 0.2247,
+ "eval_samples_per_second": 725.266,
+ "eval_steps_per_second": 4.449,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5040
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9210900955867399,
+ "eval_auc": 0.9493002596027307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7531806615776081,
+ "eval_f1_macro": 0.8531093334515976,
+ "eval_loss": 0.2344331294298172,
+ "eval_pr_auc": 0.7641447446055818,
+ "eval_precision": 0.7430185127078758,
+ "eval_precision_macro": 0.8492715280607518,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8570945060880382,
+ "eval_runtime": 0.263,
+ "eval_samples_per_second": 619.693,
+ "eval_steps_per_second": 3.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5058
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9493485327975579,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7538534880025425,
+ "eval_f1_macro": 0.8534875889608693,
+ "eval_loss": 0.2344052791595459,
+ "eval_pr_auc": 0.764296468123137,
+ "eval_precision": 0.7431077694235589,
+ "eval_precision_macro": 0.849430796583593,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8577092788496623,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.097,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5076
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.9212426276184665,
+ "eval_auc": 0.9494190544850012,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7536969311496263,
+ "eval_f1_macro": 0.8534121476034391,
+ "eval_loss": 0.23425185680389404,
+ "eval_pr_auc": 0.7646652388104445,
+ "eval_precision": 0.7434127979924717,
+ "eval_precision_macro": 0.8495280009379834,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8574471631877492,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.881,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5094
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495173137944721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7539000318369946,
+ "eval_f1_macro": 0.853564041452472,
+ "eval_loss": 0.2340681403875351,
+ "eval_pr_auc": 0.7651886756989377,
+ "eval_precision": 0.744419993712669,
+ "eval_precision_macro": 0.8499803613859639,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.763624637213802,
+ "eval_recall_macro": 0.8572755889636343,
+ "eval_runtime": 0.2477,
+ "eval_samples_per_second": 658.168,
+ "eval_steps_per_second": 4.038,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5112
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9495497619459952,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7540566337893733,
+ "eval_f1_macro": 0.8536395120535369,
+ "eval_loss": 0.234034925699234,
+ "eval_pr_auc": 0.7653794962608654,
+ "eval_precision": 0.7441130298273155,
+ "eval_precision_macro": 0.8498821534503319,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8575377046255472,
+ "eval_runtime": 0.2024,
+ "eval_samples_per_second": 805.487,
+ "eval_steps_per_second": 4.942,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5130
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9215476916819199,
+ "eval_auc": 0.9496228140831674,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.754181934044926,
+ "eval_f1_macro": 0.8537539029854382,
+ "eval_loss": 0.23388919234275818,
+ "eval_pr_auc": 0.7657504791267543,
+ "eval_precision": 0.7452770780856424,
+ "eval_precision_macro": 0.8503853253634615,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.8572350725704758,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.381,
+ "eval_steps_per_second": 5.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9497348546111616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7550662198819211,
+ "eval_f1_macro": 0.8543240621923138,
+ "eval_loss": 0.2337103933095932,
+ "eval_pr_auc": 0.7663000808208629,
+ "eval_precision": 0.7473152242577384,
+ "eval_precision_macro": 0.8513875842534602,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8573454585736473,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.107,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5166
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9218527557453732,
+ "eval_auc": 0.9498016974139991,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7548253309937789,
+ "eval_f1_macro": 0.8541719723587154,
+ "eval_loss": 0.23359474539756775,
+ "eval_pr_auc": 0.7666273574525592,
+ "eval_precision": 0.7468434343434344,
+ "eval_precision_macro": 0.8511489898989899,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7629796839729119,
+ "eval_recall_macro": 0.8572850976151154,
+ "eval_runtime": 0.2686,
+ "eval_samples_per_second": 606.819,
+ "eval_steps_per_second": 3.723,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5184
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9216493797030709,
+ "eval_auc": 0.9498063690134986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7546569017672345,
+ "eval_f1_macro": 0.8540188154275592,
+ "eval_loss": 0.23359988629817963,
+ "eval_pr_auc": 0.7666725529217715,
+ "eval_precision": 0.7452830188679245,
+ "eval_precision_macro": 0.8504738723645784,
+ "eval_pred_class_0": 16488,
+ "eval_pred_class_1": 3180,
+ "eval_predicted_binding_ratio": 0.16168395363026236,
+ "eval_recall": 0.764269590454692,
+ "eval_recall_macro": 0.8576886070218773,
+ "eval_runtime": 0.2712,
+ "eval_samples_per_second": 600.959,
+ "eval_steps_per_second": 3.687,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5202
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9213951596501933,
+ "eval_auc": 0.9498248412965191,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7543692405465523,
+ "eval_f1_macro": 0.8537901526260393,
+ "eval_loss": 0.23362942039966583,
+ "eval_pr_auc": 0.7666566917414745,
+ "eval_precision": 0.7435014093329158,
+ "eval_precision_macro": 0.8496869717377781,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7655594969364721,
+ "eval_recall_macro": 0.8580619359493733,
+ "eval_runtime": 0.2158,
+ "eval_samples_per_second": 755.492,
+ "eval_steps_per_second": 4.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5220
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9219544437665244,
+ "eval_auc": 0.9499087646350263,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554564282300462,
+ "eval_f1_macro": 0.8545121458733945,
+ "eval_loss": 0.23345860838890076,
+ "eval_pr_auc": 0.767032300253484,
+ "eval_precision": 0.746536523929471,
+ "eval_precision_macro": 0.8511363192046093,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8580007477284299,
+ "eval_runtime": 0.2582,
+ "eval_samples_per_second": 631.391,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5238
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9226154159040065,
+ "eval_auc": 0.9500412726083274,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7567135549872123,
+ "eval_f1_macro": 0.8553519407342349,
+ "eval_loss": 0.23320625722408295,
+ "eval_pr_auc": 0.7678050059622479,
+ "eval_precision": 0.7502377179080824,
+ "eval_precision_macro": 0.8528939452496871,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.763302160593357,
+ "eval_recall_macro": 0.857868862635062,
+ "eval_runtime": 0.2022,
+ "eval_samples_per_second": 806.124,
+ "eval_steps_per_second": 4.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5256
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500337688516315,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7551801083838062,
+ "eval_f1_macro": 0.8543609694420392,
+ "eval_loss": 0.23323103785514832,
+ "eval_pr_auc": 0.7676369094541509,
+ "eval_precision": 0.7466120390797353,
+ "eval_precision_macro": 0.8511174775574487,
+ "eval_pred_class_0": 16495,
+ "eval_pred_class_1": 3173,
+ "eval_predicted_binding_ratio": 0.16132804555623348,
+ "eval_recall": 0.7639471138342471,
+ "eval_recall_macro": 0.8577084515872508,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.373,
+ "eval_steps_per_second": 4.07,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5274
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9219035997559487,
+ "eval_auc": 0.9500722803500048,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7554140127388536,
+ "eval_f1_macro": 0.8544737053045671,
+ "eval_loss": 0.23318050801753998,
+ "eval_pr_auc": 0.7677669872107012,
+ "eval_precision": 0.7461465869770368,
+ "eval_precision_macro": 0.8509676473001504,
+ "eval_pred_class_0": 16489,
+ "eval_pred_class_1": 3179,
+ "eval_predicted_binding_ratio": 0.1616331096196868,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581016250801203,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.765,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5292
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9223103518405532,
+ "eval_auc": 0.9501381693679445,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7562998405103668,
+ "eval_f1_macro": 0.8550446157127531,
+ "eval_loss": 0.23305083811283112,
+ "eval_pr_auc": 0.7681080957655504,
+ "eval_precision": 0.7481855474913222,
+ "eval_precision_macro": 0.8519702208636682,
+ "eval_pred_class_0": 16499,
+ "eval_pred_class_1": 3169,
+ "eval_predicted_binding_ratio": 0.16112466951393126,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582120110832919,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.885,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5310
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9220052877770999,
+ "eval_auc": 0.9501577511558462,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7556546670914304,
+ "eval_f1_macro": 0.8546256879531203,
+ "eval_loss": 0.23304298520088196,
+ "eval_pr_auc": 0.7681639014007426,
+ "eval_precision": 0.7466163046899591,
+ "eval_precision_macro": 0.8512051870912047,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8581619860386525,
+ "eval_runtime": 0.2233,
+ "eval_samples_per_second": 730.092,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5328
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9502301512155882,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7564981661616967,
+ "eval_f1_macro": 0.8551582014839937,
+ "eval_loss": 0.23290005326271057,
+ "eval_pr_auc": 0.7685493782671796,
+ "eval_precision": 0.7482649842271294,
+ "eval_precision_macro": 0.8520389050120978,
+ "eval_pred_class_0": 16498,
+ "eval_pred_class_1": 3170,
+ "eval_predicted_binding_ratio": 0.16117551352450682,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8583732493935144,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.312,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5346
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9224628838722798,
+ "eval_auc": 0.9503260649928105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756739511883873,
+ "eval_f1_macro": 0.8553105125875349,
+ "eval_loss": 0.23275841772556305,
+ "eval_pr_auc": 0.768979970846171,
+ "eval_precision": 0.7487373737373737,
+ "eval_precision_macro": 0.8522777777777777,
+ "eval_pred_class_0": 16500,
+ "eval_pred_class_1": 3168,
+ "eval_predicted_binding_ratio": 0.1610738255033557,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8584336103520465,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.056,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5364
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9224120398617043,
+ "eval_auc": 0.9503799246420391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565411614550096,
+ "eval_f1_macro": 0.8551969143430849,
+ "eval_loss": 0.23264609277248383,
+ "eval_pr_auc": 0.7692428666566218,
+ "eval_precision": 0.748658035996211,
+ "eval_precision_macro": 0.8522091464751675,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.764592067075137,
+ "eval_recall_macro": 0.8582723720418239,
+ "eval_runtime": 0.2384,
+ "eval_samples_per_second": 683.765,
+ "eval_steps_per_second": 4.195,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9503983969250598,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7565758010521282,
+ "eval_f1_macro": 0.8551956221484214,
+ "eval_loss": 0.23263320326805115,
+ "eval_pr_auc": 0.7693119480202146,
+ "eval_precision": 0.748108448928121,
+ "eval_precision_macro": 0.8519882690809373,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7652370203160271,
+ "eval_recall_macro": 0.8585043072244709,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.523,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5400
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9225137278828553,
+ "eval_auc": 0.9504205091626904,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7572475310608474,
+ "eval_f1_macro": 0.855573369257207,
+ "eval_loss": 0.2325783669948578,
+ "eval_pr_auc": 0.7694562109808358,
+ "eval_precision": 0.7481901164620711,
+ "eval_precision_macro": 0.8521436908185075,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.859119079986095,
+ "eval_runtime": 0.2413,
+ "eval_samples_per_second": 675.443,
+ "eval_steps_per_second": 4.144,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5418
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504706120673215,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.756808408982322,
+ "eval_f1_macro": 0.8553077347570655,
+ "eval_loss": 0.23251411318778992,
+ "eval_pr_auc": 0.7697272239104135,
+ "eval_precision": 0.7476400251730648,
+ "eval_precision_macro": 0.8518369925744038,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7662044501773622,
+ "eval_recall_macro": 0.8588974807173404,
+ "eval_runtime": 0.2708,
+ "eval_samples_per_second": 602.022,
+ "eval_steps_per_second": 3.693,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5436
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9223611958511287,
+ "eval_auc": 0.9504959749596038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7571178622554477,
+ "eval_f1_macro": 0.8554568705510044,
+ "eval_loss": 0.23248492181301117,
+ "eval_pr_auc": 0.7698215821647963,
+ "eval_precision": 0.7470182046453233,
+ "eval_precision_macro": 0.8516367567335341,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8594217120411665,
+ "eval_runtime": 0.2271,
+ "eval_samples_per_second": 717.802,
+ "eval_steps_per_second": 4.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5454
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9228187919463087,
+ "eval_auc": 0.9505587106478812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7579719387755102,
+ "eval_f1_macro": 0.8560304891070873,
+ "eval_loss": 0.23235370218753815,
+ "eval_pr_auc": 0.7701546218803492,
+ "eval_precision": 0.749605802585935,
+ "eval_precision_macro": 0.8528595176474563,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8593001628616911,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.772,
+ "eval_steps_per_second": 5.41,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5472
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9506673837312365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7583120204603581,
+ "eval_f1_macro": 0.8563023222011585,
+ "eval_loss": 0.23212042450904846,
+ "eval_pr_auc": 0.7706813722507476,
+ "eval_precision": 0.7518225039619651,
+ "eval_precision_macro": 0.8538377341465491,
+ "eval_pred_class_0": 16513,
+ "eval_pred_class_1": 3155,
+ "eval_predicted_binding_ratio": 0.1604128533658735,
+ "eval_recall": 0.7649145436955821,
+ "eval_recall_macro": 0.8588259565825047,
+ "eval_runtime": 0.1758,
+ "eval_samples_per_second": 927.397,
+ "eval_steps_per_second": 5.69,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5490
+ },
+ {
+ "epoch": 305.55555555555554,
+ "grad_norm": 15827.6396484375,
+ "learning_rate": 3.943376017723057e-07,
+ "loss": 0.1954,
+ "step": 5500
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9506862453142154,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591823698498882,
+ "eval_f1_macro": 0.8567938214370079,
+ "eval_loss": 0.23211389780044556,
+ "eval_pr_auc": 0.7707434518060764,
+ "eval_precision": 0.7519772223979754,
+ "eval_precision_macro": 0.8540585209342515,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7665269267978072,
+ "eval_recall_macro": 0.8596019676543512,
+ "eval_runtime": 0.2349,
+ "eval_samples_per_second": 693.884,
+ "eval_steps_per_second": 4.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5508
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9506972041080411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588075880758808,
+ "eval_f1_macro": 0.8565232326853711,
+ "eval_loss": 0.23213696479797363,
+ "eval_pr_auc": 0.7706724583082463,
+ "eval_precision": 0.7503152585119798,
+ "eval_precision_macro": 0.8533038465207814,
+ "eval_pred_class_0": 16496,
+ "eval_pred_class_1": 3172,
+ "eval_predicted_binding_ratio": 0.16127720154565792,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8598442387508907,
+ "eval_runtime": 0.2681,
+ "eval_samples_per_second": 608.072,
+ "eval_steps_per_second": 3.731,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5526
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9229204799674599,
+ "eval_auc": 0.9507381389986547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.758521822236381,
+ "eval_f1_macro": 0.8563315143004762,
+ "eval_loss": 0.23207640647888184,
+ "eval_pr_auc": 0.7708824410889222,
+ "eval_precision": 0.7494491658797607,
+ "eval_precision_macro": 0.8528944938003498,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598847551440492,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.193,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5544
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9233272320520642,
+ "eval_auc": 0.9508327778185136,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7593360995850622,
+ "eval_f1_macro": 0.8568679288369823,
+ "eval_loss": 0.2318853884935379,
+ "eval_pr_auc": 0.7713979747932131,
+ "eval_precision": 0.7516587677725118,
+ "eval_precision_macro": 0.8539545732457663,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8598640833162641,
+ "eval_runtime": 0.3164,
+ "eval_samples_per_second": 515.146,
+ "eval_steps_per_second": 3.16,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5562
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9508632794702453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7587747287811104,
+ "eval_f1_macro": 0.8565253830188363,
+ "eval_loss": 0.23183651268482208,
+ "eval_pr_auc": 0.7715329415149417,
+ "eval_precision": 0.7508683296495106,
+ "eval_precision_macro": 0.8535264016588866,
+ "eval_pred_class_0": 16501,
+ "eval_pred_class_1": 3167,
+ "eval_predicted_binding_ratio": 0.16102298149278016,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8596123035682436,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.635,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5580
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509030269959862,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7588844621513944,
+ "eval_f1_macro": 0.8565602855810055,
+ "eval_loss": 0.23182560503482819,
+ "eval_pr_auc": 0.7716106695707178,
+ "eval_precision": 0.7501575299306869,
+ "eval_precision_macro": 0.8532526463767658,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8599752965818471,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.932,
+ "eval_steps_per_second": 3.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5598
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9231238560097621,
+ "eval_auc": 0.9509431638216853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7591589678241478,
+ "eval_f1_macro": 0.8567105868221108,
+ "eval_loss": 0.23177149891853333,
+ "eval_pr_auc": 0.7718084005522707,
+ "eval_precision": 0.7500786905886057,
+ "eval_precision_macro": 0.853269895291271,
+ "eval_pred_class_0": 16491,
+ "eval_pred_class_1": 3177,
+ "eval_predicted_binding_ratio": 0.1615314215985357,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602675927230261,
+ "eval_runtime": 0.2364,
+ "eval_samples_per_second": 689.439,
+ "eval_steps_per_second": 4.23,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9230730119991865,
+ "eval_auc": 0.9509867264870173,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7590380633858895,
+ "eval_f1_macro": 0.856634317411552,
+ "eval_loss": 0.23171813786029816,
+ "eval_pr_auc": 0.771954368377823,
+ "eval_precision": 0.749842668344871,
+ "eval_precision_macro": 0.8531505640086999,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8602374122437602,
+ "eval_runtime": 0.2592,
+ "eval_samples_per_second": 628.85,
+ "eval_steps_per_second": 3.858,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5634
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9228696359568843,
+ "eval_auc": 0.9509922740114228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7584010192705845,
+ "eval_f1_macro": 0.8562552937959844,
+ "eval_loss": 0.23173367977142334,
+ "eval_pr_auc": 0.7720376545527768,
+ "eval_precision": 0.7492133417243549,
+ "eval_precision_macro": 0.8527752578846153,
+ "eval_pred_class_0": 16490,
+ "eval_pred_class_1": 3178,
+ "eval_predicted_binding_ratio": 0.16158226560911124,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8598545746647831,
+ "eval_runtime": 0.2664,
+ "eval_samples_per_second": 611.795,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5652
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9233780760626398,
+ "eval_auc": 0.9510762654774227,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7595340673368438,
+ "eval_f1_macro": 0.8569813431425517,
+ "eval_loss": 0.23158977925777435,
+ "eval_pr_auc": 0.7725350282702966,
+ "eval_precision": 0.7517372078332281,
+ "eval_precision_macro": 0.8540227670483556,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600253216264866,
+ "eval_runtime": 0.2525,
+ "eval_samples_per_second": 645.435,
+ "eval_steps_per_second": 3.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5670
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9511527434542277,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7601405301820504,
+ "eval_f1_macro": 0.8573636072933594,
+ "eval_loss": 0.23149563372135162,
+ "eval_pr_auc": 0.7728791764414321,
+ "eval_precision": 0.7529262891490035,
+ "eval_precision_macro": 0.8546239248495366,
+ "eval_pred_class_0": 16507,
+ "eval_pred_class_1": 3161,
+ "eval_predicted_binding_ratio": 0.1607179174293268,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8601762240228168,
+ "eval_runtime": 0.2078,
+ "eval_samples_per_second": 784.391,
+ "eval_steps_per_second": 4.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5688
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.923581452104942,
+ "eval_auc": 0.9511918583675363,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7598657932577089,
+ "eval_f1_macro": 0.8572131820235396,
+ "eval_loss": 0.23143813014030457,
+ "eval_pr_auc": 0.7730966214358813,
+ "eval_precision": 0.7530082330588981,
+ "eval_precision_macro": 0.8546082958147307,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7668494034182521,
+ "eval_recall_macro": 0.8598839278816377,
+ "eval_runtime": 0.2098,
+ "eval_samples_per_second": 777.022,
+ "eval_steps_per_second": 4.767,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5706
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.923479764083791,
+ "eval_auc": 0.9512240340090886,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7597765363128491,
+ "eval_f1_macro": 0.8571341935895835,
+ "eval_loss": 0.23142649233341217,
+ "eval_pr_auc": 0.7731516186784236,
+ "eval_precision": 0.7522123893805309,
+ "eval_precision_macro": 0.8542630051604545,
+ "eval_pred_class_0": 16504,
+ "eval_pred_class_1": 3164,
+ "eval_predicted_binding_ratio": 0.1608704494610535,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8600856825850187,
+ "eval_runtime": 0.2091,
+ "eval_samples_per_second": 779.633,
+ "eval_steps_per_second": 4.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5724
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9512825457928188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7600638977635783,
+ "eval_f1_macro": 0.8573266640831436,
+ "eval_loss": 0.2313271462917328,
+ "eval_pr_auc": 0.7734563478045352,
+ "eval_precision": 0.7530864197530864,
+ "eval_precision_macro": 0.8546763493762101,
+ "eval_pred_class_0": 16509,
+ "eval_pred_class_1": 3159,
+ "eval_predicted_binding_ratio": 0.16061622940817571,
+ "eval_recall": 0.7671718800386972,
+ "eval_recall_macro": 0.8600451661918602,
+ "eval_runtime": 0.2154,
+ "eval_samples_per_second": 756.779,
+ "eval_steps_per_second": 4.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5742
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9513326194999532,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7605048729829046,
+ "eval_f1_macro": 0.8575931868618003,
+ "eval_loss": 0.23122872412204742,
+ "eval_pr_auc": 0.7736938128704516,
+ "eval_precision": 0.7536415452818239,
+ "eval_precision_macro": 0.8549855212781016,
+ "eval_pred_class_0": 16510,
+ "eval_pred_class_1": 3158,
+ "eval_predicted_binding_ratio": 0.16056538539760015,
+ "eval_recall": 0.7674943566591422,
+ "eval_recall_macro": 0.8602667654606148,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.713,
+ "eval_steps_per_second": 4.256,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5760
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9513477438033324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602171136653896,
+ "eval_f1_macro": 0.8574005258699469,
+ "eval_loss": 0.2312333732843399,
+ "eval_pr_auc": 0.773709338696213,
+ "eval_precision": 0.7527663610496365,
+ "eval_precision_macro": 0.8545716082739852,
+ "eval_pred_class_0": 16505,
+ "eval_pred_class_1": 3163,
+ "eval_predicted_binding_ratio": 0.16081960545047794,
+ "eval_recall": 0.7678168332795873,
+ "eval_recall_macro": 0.8603072818537733,
+ "eval_runtime": 0.2276,
+ "eval_samples_per_second": 716.105,
+ "eval_steps_per_second": 4.393,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5778
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9235306080943665,
+ "eval_auc": 0.9513562889374167,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7602040816326531,
+ "eval_f1_macro": 0.8573582711574831,
+ "eval_loss": 0.23121465742588043,
+ "eval_pr_auc": 0.7737540605936648,
+ "eval_precision": 0.7518133081046988,
+ "eval_precision_macro": 0.854175430193466,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8606400943881107,
+ "eval_runtime": 0.2488,
+ "eval_samples_per_second": 655.047,
+ "eval_steps_per_second": 4.019,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5796
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514191998106756,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23111233115196228,
+ "eval_pr_auc": 0.7739781495758574,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2355,
+ "eval_samples_per_second": 692.067,
+ "eval_steps_per_second": 4.246,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5814
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9514517842171841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7604914632200415,
+ "eval_f1_macro": 0.8575507604890313,
+ "eval_loss": 0.23104801774024963,
+ "eval_pr_auc": 0.7741130008089699,
+ "eval_precision": 0.7526847757422616,
+ "eval_precision_macro": 0.8545874490758332,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8605995779949522,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.295,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5832
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.951504651151519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606128311522502,
+ "eval_f1_macro": 0.857627250169412,
+ "eval_loss": 0.23095941543579102,
+ "eval_pr_auc": 0.7744096919390852,
+ "eval_precision": 0.7529225908372827,
+ "eval_precision_macro": 0.8547076748648027,
+ "eval_pred_class_0": 16503,
+ "eval_pred_class_1": 3165,
+ "eval_predicted_binding_ratio": 0.16092129347162903,
+ "eval_recall": 0.7684617865204773,
+ "eval_recall_macro": 0.8606297584742182,
+ "eval_runtime": 0.2175,
+ "eval_samples_per_second": 749.374,
+ "eval_steps_per_second": 4.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.951546208922066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7606578317100431,
+ "eval_f1_macro": 0.8576669257120046,
+ "eval_loss": 0.23088191449642181,
+ "eval_pr_auc": 0.774641356281408,
+ "eval_precision": 0.7533206831119544,
+ "eval_precision_macro": 0.8548803827531177,
+ "eval_pred_class_0": 16506,
+ "eval_pred_class_1": 3162,
+ "eval_predicted_binding_ratio": 0.16076876143990237,
+ "eval_recall": 0.7681393099000322,
+ "eval_recall_macro": 0.8605288811225278,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.56,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5868
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9515728175742149,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7608105951811074,
+ "eval_f1_macro": 0.8577405662711912,
+ "eval_loss": 0.23083868622779846,
+ "eval_pr_auc": 0.7747608129205691,
+ "eval_precision": 0.7530006317119393,
+ "eval_precision_macro": 0.8547756764183257,
+ "eval_pred_class_0": 16502,
+ "eval_pred_class_1": 3166,
+ "eval_predicted_binding_ratio": 0.1609721374822046,
+ "eval_recall": 0.7687842631409223,
+ "eval_recall_macro": 0.8607909967844407,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.065,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5886
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9516116113150578,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7611607142857143,
+ "eval_f1_macro": 0.8579273206076528,
+ "eval_loss": 0.23078228533267975,
+ "eval_pr_auc": 0.7749744562806883,
+ "eval_precision": 0.7527593818984547,
+ "eval_precision_macro": 0.8547393927131844,
+ "eval_pred_class_0": 16497,
+ "eval_pred_class_1": 3171,
+ "eval_predicted_binding_ratio": 0.16122635753508235,
+ "eval_recall": 0.7697516930022573,
+ "eval_recall_macro": 0.8612143507565763,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.541,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5904
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9238865161683953,
+ "eval_auc": 0.9516614027797223,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7614342629482072,
+ "eval_f1_macro": 0.8580771629311073,
+ "eval_loss": 0.2307167798280716,
+ "eval_pr_auc": 0.7752067172424865,
+ "eval_precision": 0.7526780088216761,
+ "eval_precision_macro": 0.8547553982510223,
+ "eval_pred_class_0": 16494,
+ "eval_pred_class_1": 3174,
+ "eval_predicted_binding_ratio": 0.16137888956680904,
+ "eval_recall": 0.7703966462431474,
+ "eval_recall_macro": 0.8615066468977552,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.134,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5922
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.9236322961155176,
+ "eval_auc": 0.9516455582714203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7612841703750794,
+ "eval_f1_macro": 0.857914812460267,
+ "eval_loss": 0.23076769709587097,
+ "eval_pr_auc": 0.775112377066796,
+ "eval_precision": 0.750548417424005,
+ "eval_precision_macro": 0.8538504058352652,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8621420914871643,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.508,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5940
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9516963035209824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621776504297995,
+ "eval_f1_macro": 0.8584894423868002,
+ "eval_loss": 0.23067235946655273,
+ "eval_pr_auc": 0.7753820734835624,
+ "eval_precision": 0.7525935240490412,
+ "eval_precision_macro": 0.8548556265844769,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7720090293453724,
+ "eval_recall_macro": 0.8622524774903357,
+ "eval_runtime": 0.1824,
+ "eval_samples_per_second": 893.499,
+ "eval_steps_per_second": 5.482,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5958
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9517492385828104,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619502868068834,
+ "eval_f1_macro": 0.8583798620967267,
+ "eval_loss": 0.23056790232658386,
+ "eval_pr_auc": 0.775607049366595,
+ "eval_precision": 0.7530708661417322,
+ "eval_precision_macro": 0.8550111500417023,
+ "eval_pred_class_0": 16493,
+ "eval_pred_class_1": 3175,
+ "eval_predicted_binding_ratio": 0.16142973357738458,
+ "eval_recall": 0.7710415994840374,
+ "eval_recall_macro": 0.8618593039974662,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.679,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5976
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9517777256072577,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7621475227019276,
+ "eval_f1_macro": 0.8584929210351648,
+ "eval_loss": 0.23053352534770966,
+ "eval_pr_auc": 0.7757600766000483,
+ "eval_precision": 0.7531486146095718,
+ "eval_precision_macro": 0.855079036870636,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7713640761044824,
+ "eval_recall_macro": 0.8620205423076888,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.883,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5994
+ },
+ {
+ "epoch": 333.3333333333333,
+ "grad_norm": 16736.6328125,
+ "learning_rate": 3.021381973636964e-07,
+ "loss": 0.1913,
+ "step": 6000
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9237848281472443,
+ "eval_auc": 0.9517933365355851,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616473207187152,
+ "eval_f1_macro": 0.8581438407085573,
+ "eval_loss": 0.23052088916301727,
+ "eval_pr_auc": 0.7758187649274527,
+ "eval_precision": 0.751254705144291,
+ "eval_precision_macro": 0.8542074496595242,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7723315059658175,
+ "eval_recall_macro": 0.8622326329249622,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.473,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6012
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9238356721578198,
+ "eval_auc": 0.9518365877609504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7619198982835347,
+ "eval_f1_macro": 0.8582932017746205,
+ "eval_loss": 0.23045583069324493,
+ "eval_pr_auc": 0.7760158372270667,
+ "eval_precision": 0.7511751801942964,
+ "eval_precision_macro": 0.8542244778801185,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8625249290661412,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.186,
+ "eval_steps_per_second": 4.566,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6030
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9518931822423861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624661462482077,
+ "eval_f1_macro": 0.8586824817571539,
+ "eval_loss": 0.23035065829753876,
+ "eval_pr_auc": 0.7762652864261658,
+ "eval_precision": 0.753463476070529,
+ "eval_precision_macro": 0.855266785330923,
+ "eval_pred_class_0": 16492,
+ "eval_pred_class_1": 3176,
+ "eval_predicted_binding_ratio": 0.16148057758796014,
+ "eval_recall": 0.7716865527249275,
+ "eval_recall_macro": 0.8622119610971773,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.881,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6048
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9240390482001221,
+ "eval_auc": 0.9519021264089276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7623289850461342,
+ "eval_f1_macro": 0.8585623745200415,
+ "eval_loss": 0.2303379327058792,
+ "eval_pr_auc": 0.7763353590359,
+ "eval_precision": 0.752276295133438,
+ "eval_precision_macro": 0.8547524774823897,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625145931522488,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.828,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6066
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519062432559864,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23036180436611176,
+ "eval_pr_auc": 0.7763420780879606,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.319,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9237339841366687,
+ "eval_auc": 0.9519363653402588,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7618291521117815,
+ "eval_f1_macro": 0.8582134440261069,
+ "eval_loss": 0.23031854629516602,
+ "eval_pr_auc": 0.776475073481046,
+ "eval_precision": 0.7503909915545824,
+ "eval_precision_macro": 0.8538853142461151,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8627266837695222,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.603,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6102
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9236831401260931,
+ "eval_auc": 0.9519665750170216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7616325234238527,
+ "eval_f1_macro": 0.8581006831532533,
+ "eval_loss": 0.23026354610919952,
+ "eval_pr_auc": 0.7766276763039114,
+ "eval_precision": 0.7503128911138923,
+ "eval_precision_macro": 0.8538172032062905,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7732989358271525,
+ "eval_recall_macro": 0.8625654454592997,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.893,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6120
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9520129601070512,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.230192169547081,
+ "eval_pr_auc": 0.7768138361852162,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.971,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6138
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9239882041895465,
+ "eval_auc": 0.9520488340982072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7622833518842423,
+ "eval_f1_macro": 0.8585223761569667,
+ "eval_loss": 0.2301386296749115,
+ "eval_pr_auc": 0.7769598633905366,
+ "eval_precision": 0.7518820577164367,
+ "eval_precision_macro": 0.8545818055572474,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626154705039393,
+ "eval_runtime": 0.2473,
+ "eval_samples_per_second": 659.185,
+ "eval_steps_per_second": 4.044,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6156
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9241407362212731,
+ "eval_auc": 0.952100825107636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7626471524021635,
+ "eval_f1_macro": 0.8587517153841377,
+ "eval_loss": 0.23005619645118713,
+ "eval_pr_auc": 0.7772456933395511,
+ "eval_precision": 0.7525902668759812,
+ "eval_precision_macro": 0.8549397976374689,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627060119417373,
+ "eval_runtime": 0.2129,
+ "eval_samples_per_second": 765.55,
+ "eval_steps_per_second": 4.697,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6174
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521250006350455,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.23000310361385345,
+ "eval_pr_auc": 0.7773917675410515,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.276,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6192
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9241915802318487,
+ "eval_auc": 0.9521700231752211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7627684964200477,
+ "eval_f1_macro": 0.8588281984687149,
+ "eval_loss": 0.22992061078548431,
+ "eval_pr_auc": 0.777567865132197,
+ "eval_precision": 0.7528266331658291,
+ "eval_precision_macro": 0.8550592763014295,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8627361924210033,
+ "eval_runtime": 0.2684,
+ "eval_samples_per_second": 607.371,
+ "eval_steps_per_second": 3.726,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6210
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9521951914175242,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7624502784407319,
+ "eval_f1_macro": 0.8586388332084449,
+ "eval_loss": 0.22986458241939545,
+ "eval_pr_auc": 0.7777181268262345,
+ "eval_precision": 0.7525125628140703,
+ "eval_precision_macro": 0.8548719086819685,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7726539825862625,
+ "eval_recall_macro": 0.8625447736315148,
+ "eval_runtime": 0.2315,
+ "eval_samples_per_second": 704.221,
+ "eval_steps_per_second": 4.32,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6228
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9240898922106976,
+ "eval_auc": 0.9522022182817713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7625258469858438,
+ "eval_f1_macro": 0.8586752506435165,
+ "eval_loss": 0.22987791895866394,
+ "eval_pr_auc": 0.7777283636078421,
+ "eval_precision": 0.7523540489642184,
+ "eval_precision_macro": 0.8548203930053466,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7729764592067075,
+ "eval_recall_macro": 0.8626758314624713,
+ "eval_runtime": 0.2021,
+ "eval_samples_per_second": 806.414,
+ "eval_steps_per_second": 4.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6246
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9522300142987927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7634340222575516,
+ "eval_f1_macro": 0.8592029398342167,
+ "eval_loss": 0.229818195104599,
+ "eval_pr_auc": 0.7778254023800715,
+ "eval_precision": 0.7529005957980558,
+ "eval_precision_macro": 0.8552111450378106,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7742663656884876,
+ "eval_recall_macro": 0.8633509651826273,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.887,
+ "eval_steps_per_second": 4.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6264
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9522554939810626,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22978660464286804,
+ "eval_pr_auc": 0.7779049389173774,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.696,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6282
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9522950273918264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638270820089001,
+ "eval_f1_macro": 0.859428369717681,
+ "eval_loss": 0.2297380119562149,
+ "eval_pr_auc": 0.7780796039517833,
+ "eval_precision": 0.7530554685051708,
+ "eval_precision_macro": 0.855346694014678,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8636734418030723,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.348,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6300
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523093341652933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636883034438978,
+ "eval_f1_macro": 0.8593074482256571,
+ "eval_loss": 0.22972844541072845,
+ "eval_pr_auc": 0.7781517759374512,
+ "eval_precision": 0.751875,
+ "eval_precision_macro": 0.8548359697595336,
+ "eval_pred_class_0": 16468,
+ "eval_pred_class_1": 3200,
+ "eval_predicted_binding_ratio": 0.16270083384177345,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639760738581439,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.342,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.9242424242424242,
+ "eval_auc": 0.952317528929415,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7635671215487146,
+ "eval_f1_macro": 0.8592310391299909,
+ "eval_loss": 0.22972537577152252,
+ "eval_pr_auc": 0.7781868067856106,
+ "eval_precision": 0.7516401124648547,
+ "eval_precision_macro": 0.8547172445484534,
+ "eval_pred_class_0": 16467,
+ "eval_pred_class_1": 3201,
+ "eval_predicted_binding_ratio": 0.16275167785234898,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8639458933788778,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.7,
+ "eval_steps_per_second": 4.274,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6336
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.9523571596651685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.2296588122844696,
+ "eval_pr_auc": 0.7784014772150735,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.953,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6354
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9243441122635754,
+ "eval_auc": 0.952381568772553,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638095238095238,
+ "eval_f1_macro": 0.8593838755989138,
+ "eval_loss": 0.22961482405662537,
+ "eval_pr_auc": 0.7785102930543456,
+ "eval_precision": 0.7521100343857455,
+ "eval_precision_macro": 0.8549547682402951,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8640062543374099,
+ "eval_runtime": 0.2671,
+ "eval_samples_per_second": 610.236,
+ "eval_steps_per_second": 3.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6372
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.9242932682529998,
+ "eval_auc": 0.9523906199965831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763838223632038,
+ "eval_f1_macro": 0.8593796791618457,
+ "eval_loss": 0.2295987904071808,
+ "eval_pr_auc": 0.7785825605072106,
+ "eval_precision": 0.7515605493133583,
+ "eval_precision_macro": 0.8547343562893321,
+ "eval_pred_class_0": 16464,
+ "eval_pred_class_1": 3204,
+ "eval_predicted_binding_ratio": 0.16290420988407567,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.8642381895200568,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.418,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6390
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9524596428791869,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636305833730727,
+ "eval_f1_macro": 0.8593156699585895,
+ "eval_loss": 0.229468435049057,
+ "eval_pr_auc": 0.7789122838326235,
+ "eval_precision": 0.7529780564263323,
+ "eval_precision_macro": 0.8552789299002641,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635122034928499,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.191,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6408
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524861542063461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.2294115126132965,
+ "eval_pr_auc": 0.7790142584142796,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.426,
+ "eval_steps_per_second": 3.911,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6426
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9524829619466881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7639771283354511,
+ "eval_f1_macro": 0.8595006707052558,
+ "eval_loss": 0.2294154018163681,
+ "eval_pr_auc": 0.7789936192429844,
+ "eval_precision": 0.7527386541471048,
+ "eval_precision_macro": 0.8552438490185533,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7755562721702677,
+ "eval_recall_macro": 0.8639355574649854,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.738,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6444
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525149624032593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.2293538749217987,
+ "eval_pr_auc": 0.7791661873069065,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.132,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6462
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9243949562741509,
+ "eval_auc": 0.9525288798767679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763780778395552,
+ "eval_f1_macro": 0.8593880436271214,
+ "eval_loss": 0.22932648658752441,
+ "eval_pr_auc": 0.7792072068588576,
+ "eval_precision": 0.7526612398246713,
+ "eval_precision_macro": 0.8551760733541227,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7752337955498226,
+ "eval_recall_macro": 0.8637743191547629,
+ "eval_runtime": 0.2714,
+ "eval_samples_per_second": 600.631,
+ "eval_steps_per_second": 3.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6480
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9525697758373857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.763751987281399,
+ "eval_f1_macro": 0.8593921831946546,
+ "eval_loss": 0.22925521433353424,
+ "eval_pr_auc": 0.77936321808712,
+ "eval_precision": 0.7532141737221699,
+ "eval_precision_macro": 0.8553982756468123,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7745888423089327,
+ "eval_recall_macro": 0.8635423839721159,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.66,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6498
+ },
+ {
+ "epoch": 361.1111111111111,
+ "grad_norm": 21180.3203125,
+ "learning_rate": 2.1735650901333336e-07,
+ "loss": 0.1893,
+ "step": 6500
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9244458002847264,
+ "eval_auc": 0.9526172898972942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636016544702513,
+ "eval_f1_macro": 0.8593197379764267,
+ "eval_loss": 0.22917793691158295,
+ "eval_pr_auc": 0.7795955328857882,
+ "eval_precision": 0.7535321821036107,
+ "eval_precision_macro": 0.8555017581027062,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8632802683102028,
+ "eval_runtime": 0.208,
+ "eval_samples_per_second": 783.548,
+ "eval_steps_per_second": 4.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6516
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.9526382926300437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7636479388826993,
+ "eval_f1_macro": 0.8593600478608577,
+ "eval_loss": 0.22912409901618958,
+ "eval_pr_auc": 0.7796869947527124,
+ "eval_precision": 0.7539283469516027,
+ "eval_precision_macro": 0.8556733812884909,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7736214124475975,
+ "eval_recall_macro": 0.8631793909585124,
+ "eval_runtime": 0.2657,
+ "eval_samples_per_second": 613.513,
+ "eval_steps_per_second": 3.764,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6534
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.924496644295302,
+ "eval_auc": 0.952662049659998,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7637231503579952,
+ "eval_f1_macro": 0.859396294249525,
+ "eval_loss": 0.22910362482070923,
+ "eval_pr_auc": 0.779788701256993,
+ "eval_precision": 0.7537688442211056,
+ "eval_precision_macro": 0.8556213791598126,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633104487894689,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.175,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9526903420344663,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7638446849140674,
+ "eval_f1_macro": 0.8594728689002142,
+ "eval_loss": 0.2290574461221695,
+ "eval_pr_auc": 0.7799354479263911,
+ "eval_precision": 0.7540056550424128,
+ "eval_precision_macro": 0.8557410744123195,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7739438890680426,
+ "eval_recall_macro": 0.8633406292687349,
+ "eval_runtime": 0.2049,
+ "eval_samples_per_second": 795.329,
+ "eval_steps_per_second": 4.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6570
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9245474883058775,
+ "eval_auc": 0.9527021767531981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7640699523052464,
+ "eval_f1_macro": 0.8595814265550925,
+ "eval_loss": 0.22903695702552795,
+ "eval_pr_auc": 0.7799769457420497,
+ "eval_precision": 0.753527751646284,
+ "eval_precision_macro": 0.8555854062558139,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7749113189293776,
+ "eval_recall_macro": 0.8637338027616044,
+ "eval_runtime": 0.2118,
+ "eval_samples_per_second": 769.509,
+ "eval_steps_per_second": 4.721,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6588
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9247000203376042,
+ "eval_auc": 0.9527121622971282,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.764659145081837,
+ "eval_f1_macro": 0.8599193797618125,
+ "eval_loss": 0.22902432084083557,
+ "eval_pr_auc": 0.7800307850119022,
+ "eval_precision": 0.7537593984962406,
+ "eval_precision_macro": 0.8557884149558164,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864217517692272,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.072,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6606
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9527291941703031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7649769585253456,
+ "eval_f1_macro": 0.8601085500794873,
+ "eval_loss": 0.22899393737316132,
+ "eval_pr_auc": 0.7800749822284204,
+ "eval_precision": 0.7540726817042607,
+ "eval_precision_macro": 0.85597540373147,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644089364817604,
+ "eval_runtime": 0.2331,
+ "eval_samples_per_second": 699.217,
+ "eval_steps_per_second": 4.29,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6624
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527588782921224,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22894835472106934,
+ "eval_pr_auc": 0.780237083741907,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.902,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6642
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527729125556185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650238473767885,
+ "eval_f1_macro": 0.8601491566364061,
+ "eval_loss": 0.22892294824123383,
+ "eval_pr_auc": 0.7803195480022762,
+ "eval_precision": 0.7544684854186265,
+ "eval_precision_macro": 0.856146798082819,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.86430805913007,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.873,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6660
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9527883872289603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7650985378258105,
+ "eval_f1_macro": 0.8601851483463878,
+ "eval_loss": 0.22889479994773865,
+ "eval_pr_auc": 0.7804143746656889,
+ "eval_precision": 0.7543089940457537,
+ "eval_precision_macro": 0.8560948381043845,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644391169610264,
+ "eval_runtime": 0.2368,
+ "eval_samples_per_second": 688.489,
+ "eval_steps_per_second": 4.224,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6678
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9528159885960027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653418124006359,
+ "eval_f1_macro": 0.860338399996844,
+ "eval_loss": 0.22885586321353912,
+ "eval_pr_auc": 0.7805625189044384,
+ "eval_precision": 0.7547820633427407,
+ "eval_precision_macro": 0.8563339286918206,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8644994779195585,
+ "eval_runtime": 0.1849,
+ "eval_samples_per_second": 881.352,
+ "eval_steps_per_second": 5.407,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6696
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9528519015171544,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655852417302799,
+ "eval_f1_macro": 0.8604917251982311,
+ "eval_loss": 0.22878196835517883,
+ "eval_pr_auc": 0.7807742707975688,
+ "eval_precision": 0.7552557263884531,
+ "eval_precision_macro": 0.8565733155332836,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645598388780904,
+ "eval_runtime": 0.2727,
+ "eval_samples_per_second": 597.724,
+ "eval_steps_per_second": 3.667,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6714
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528673372605004,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657070144743121,
+ "eval_f1_macro": 0.8605684154038177,
+ "eval_loss": 0.22877708077430725,
+ "eval_pr_auc": 0.7808321396342274,
+ "eval_precision": 0.7554927809165097,
+ "eval_precision_macro": 0.85669312022406,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8645900193573565,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.842,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6732
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9528682131854067,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659303988558716,
+ "eval_f1_macro": 0.8606760610325117,
+ "eval_loss": 0.2287902534008026,
+ "eval_pr_auc": 0.7808487230478494,
+ "eval_precision": 0.7550125313283208,
+ "eval_precision_macro": 0.8565363700584309,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8649831928502261,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.251,
+ "eval_steps_per_second": 5.388,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.9250050844010576,
+ "eval_auc": 0.9528953279275011,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7655380702591003,
+ "eval_f1_macro": 0.8604509839871686,
+ "eval_loss": 0.228745236992836,
+ "eval_pr_auc": 0.7809833435589818,
+ "eval_precision": 0.754858934169279,
+ "eval_precision_macro": 0.8564014297014619,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864660716229781,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.614,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6768
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9248017083587553,
+ "eval_auc": 0.9529040482465667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7651262505955216,
+ "eval_f1_macro": 0.8601804865980422,
+ "eval_loss": 0.2287396788597107,
+ "eval_pr_auc": 0.781048839864553,
+ "eval_precision": 0.7537546933667084,
+ "eval_precision_macro": 0.8558720042841312,
+ "eval_pred_class_0": 16472,
+ "eval_pred_class_1": 3196,
+ "eval_predicted_binding_ratio": 0.16249745779947122,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8646710521436733,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.891,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9248525523693308,
+ "eval_auc": 0.9529144620204507,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7653968253968254,
+ "eval_f1_macro": 0.8603288764349426,
+ "eval_loss": 0.22873102128505707,
+ "eval_pr_auc": 0.7810643100928254,
+ "eval_precision": 0.7536730228196311,
+ "eval_precision_macro": 0.8558880628094148,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8649633482848524,
+ "eval_runtime": 0.1843,
+ "eval_samples_per_second": 884.276,
+ "eval_steps_per_second": 5.425,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6804
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.924954240390482,
+ "eval_auc": 0.9529275424990492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657142857142857,
+ "eval_f1_macro": 0.8605178766021483,
+ "eval_loss": 0.22870197892189026,
+ "eval_pr_auc": 0.7811376412515475,
+ "eval_precision": 0.7539856205064083,
+ "eval_precision_macro": 0.8560747217232387,
+ "eval_pred_class_0": 16469,
+ "eval_pred_class_1": 3199,
+ "eval_predicted_binding_ratio": 0.16264998983119788,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8651547670743409,
+ "eval_runtime": 0.2724,
+ "eval_samples_per_second": 598.424,
+ "eval_steps_per_second": 3.671,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6822
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9529545793811519,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660047656870532,
+ "eval_f1_macro": 0.8607118952674847,
+ "eval_loss": 0.22864677011966705,
+ "eval_pr_auc": 0.7812048860219004,
+ "eval_precision": 0.7548528490920476,
+ "eval_precision_macro": 0.8564843339790698,
+ "eval_pred_class_0": 16474,
+ "eval_pred_class_1": 3194,
+ "eval_predicted_binding_ratio": 0.16239576977832013,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8651142506811824,
+ "eval_runtime": 0.1856,
+ "eval_samples_per_second": 878.062,
+ "eval_steps_per_second": 5.387,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6840
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9250559284116331,
+ "eval_auc": 0.9529760687388493,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658087067047982,
+ "eval_f1_macro": 0.8605994081311654,
+ "eval_loss": 0.2286224663257599,
+ "eval_pr_auc": 0.7813066136655398,
+ "eval_precision": 0.7547760726589414,
+ "eval_precision_macro": 0.8564168678924449,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.86495301237096,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.403,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6858
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530162639595422,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22855480015277863,
+ "eval_pr_auc": 0.7815011919374028,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.267,
+ "eval_samples_per_second": 610.397,
+ "eval_steps_per_second": 3.745,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6876
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530368384623376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22851014137268066,
+ "eval_pr_auc": 0.7815937686262128,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.766,
+ "eval_steps_per_second": 3.729,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6894
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530639240069352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7658288259624563,
+ "eval_f1_macro": 0.8606451240251011,
+ "eval_loss": 0.22846660017967224,
+ "eval_pr_auc": 0.7817366980630457,
+ "eval_precision": 0.7557299843014129,
+ "eval_precision_macro": 0.8568129991882603,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646201998366225,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.734,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6912
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.9251067724222086,
+ "eval_auc": 0.9530865131370146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7656324582338903,
+ "eval_f1_macro": 0.8605324858111449,
+ "eval_loss": 0.22843268513679504,
+ "eval_pr_auc": 0.7818405401720232,
+ "eval_precision": 0.7556532663316583,
+ "eval_precision_macro": 0.856745584876579,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8644589615264,
+ "eval_runtime": 0.2627,
+ "eval_samples_per_second": 620.559,
+ "eval_steps_per_second": 3.807,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6930
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9530828926474026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659033078880407,
+ "eval_f1_macro": 0.8606810172942987,
+ "eval_loss": 0.22846029698848724,
+ "eval_pr_auc": 0.781765942321457,
+ "eval_precision": 0.7555695010982115,
+ "eval_precision_macro": 0.8567605408530922,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864751257667579,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.151,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6948
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9530829315773984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665184243964421,
+ "eval_f1_macro": 0.8610134494863566,
+ "eval_loss": 0.22847168147563934,
+ "eval_pr_auc": 0.7818060776753192,
+ "eval_precision": 0.7552425665101722,
+ "eval_precision_macro": 0.8567386267869261,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8654669077808935,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.429,
+ "eval_steps_per_second": 4.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6966
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.953101403860419,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668838391863976,
+ "eval_f1_macro": 0.861243571985536,
+ "eval_loss": 0.2284410148859024,
+ "eval_pr_auc": 0.7818375459402719,
+ "eval_precision": 0.7559523809523809,
+ "eval_precision_macro": 0.8570973363853918,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655574492186915,
+ "eval_runtime": 0.2357,
+ "eval_samples_per_second": 691.702,
+ "eval_steps_per_second": 4.244,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6984
+ },
+ {
+ "epoch": 388.8888888888889,
+ "grad_norm": 17393.9921875,
+ "learning_rate": 1.4317094954644378e-07,
+ "loss": 0.1876,
+ "step": 7000
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9531112726143616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671537484116899,
+ "eval_f1_macro": 0.8613916441816318,
+ "eval_loss": 0.22843530774116516,
+ "eval_pr_auc": 0.7818710932290109,
+ "eval_precision": 0.755868544600939,
+ "eval_precision_macro": 0.8571123212290193,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8658497453598706,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.309,
+ "eval_steps_per_second": 3.806,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7002
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9531541734697645,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.2283545583486557,
+ "eval_pr_auc": 0.7820972392670548,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.253,
+ "eval_steps_per_second": 3.854,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9531671274258764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7665394402035624,
+ "eval_f1_macro": 0.8610596014864338,
+ "eval_loss": 0.22832486033439636,
+ "eval_pr_auc": 0.7821584295330316,
+ "eval_precision": 0.7561970505177282,
+ "eval_precision_macro": 0.8571349914927091,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.865134095246556,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.118,
+ "eval_steps_per_second": 4.123,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7038
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.953186076601346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22829268872737885,
+ "eval_pr_auc": 0.7822636305739935,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.357,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7056
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532213958400613,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.2282164841890335,
+ "eval_pr_auc": 0.782458419213003,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.21,
+ "eval_samples_per_second": 776.053,
+ "eval_steps_per_second": 4.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7074
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.9251576164327843,
+ "eval_auc": 0.9532409095004704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7657542966263526,
+ "eval_f1_macro": 0.8606092068875439,
+ "eval_loss": 0.22819304466247559,
+ "eval_pr_auc": 0.7825471573946872,
+ "eval_precision": 0.7558906691800189,
+ "eval_precision_macro": 0.8568655651025967,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.864489142005666,
+ "eval_runtime": 0.1952,
+ "eval_samples_per_second": 835.24,
+ "eval_steps_per_second": 5.124,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7092
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532573574237078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7659980897803247,
+ "eval_f1_macro": 0.8607627043564902,
+ "eval_loss": 0.2281719297170639,
+ "eval_pr_auc": 0.7826366824043385,
+ "eval_precision": 0.7563659226658284,
+ "eval_precision_macro": 0.8571057489837905,
+ "eval_pred_class_0": 16487,
+ "eval_pred_class_1": 3181,
+ "eval_predicted_binding_ratio": 0.1617347976408379,
+ "eval_recall": 0.7758787487907127,
+ "eval_recall_macro": 0.8645495029641981,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.649,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7110
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.9252084604433598,
+ "eval_auc": 0.9532628173556228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.765950676213206,
+ "eval_f1_macro": 0.860721851071415,
+ "eval_loss": 0.22817298769950867,
+ "eval_pr_auc": 0.7826246984855115,
+ "eval_precision": 0.7559673366834171,
+ "eval_precision_macro": 0.8569329524960401,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646503803158885,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.545,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7128
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532649585053934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7662213740458015,
+ "eval_f1_macro": 0.8608703093903662,
+ "eval_loss": 0.22816696763038635,
+ "eval_pr_auc": 0.7826996340764835,
+ "eval_precision": 0.7558832758079699,
+ "eval_precision_macro": 0.8569477661729006,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7768461786520477,
+ "eval_recall_macro": 0.8649426764570676,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.822,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7146
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9532878980054353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661469933184856,
+ "eval_f1_macro": 0.8608344648891975,
+ "eval_loss": 0.22812943160533905,
+ "eval_pr_auc": 0.7828570635819191,
+ "eval_precision": 0.756043956043956,
+ "eval_precision_macro": 0.8570003193433394,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864811618626111,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.707,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7164
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.9252593044539353,
+ "eval_auc": 0.9533086671582098,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7660725652450668,
+ "eval_f1_macro": 0.860798596552099,
+ "eval_loss": 0.2281065434217453,
+ "eval_pr_auc": 0.7829274286728394,
+ "eval_precision": 0.7562048382029531,
+ "eval_precision_macro": 0.8570529802176428,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8646805607951544,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.497,
+ "eval_steps_per_second": 4.028,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7182
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.953327003186245,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7661944930765557,
+ "eval_f1_macro": 0.8608753604764983,
+ "eval_loss": 0.22807644307613373,
+ "eval_pr_auc": 0.7830440116061308,
+ "eval_precision": 0.7564424890006285,
+ "eval_precision_macro": 0.8571730824234005,
+ "eval_pred_class_0": 16486,
+ "eval_pred_class_1": 3182,
+ "eval_predicted_binding_ratio": 0.16178564165141346,
+ "eval_recall": 0.7762012254111577,
+ "eval_recall_macro": 0.8647107412744205,
+ "eval_runtime": 0.221,
+ "eval_samples_per_second": 737.532,
+ "eval_steps_per_second": 4.525,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7200
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.9253101484645109,
+ "eval_auc": 0.9533311297658027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7664175544601686,
+ "eval_f1_macro": 0.8609828565716282,
+ "eval_loss": 0.22808308899402618,
+ "eval_pr_auc": 0.7830491617637381,
+ "eval_precision": 0.7559598494353826,
+ "eval_precision_macro": 0.8570151188924486,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651039147672901,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.406,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7218
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.9253609924750864,
+ "eval_auc": 0.9533512371086481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766390833863781,
+ "eval_f1_macro": 0.8609879862166538,
+ "eval_loss": 0.2280474752187729,
+ "eval_pr_auc": 0.7831121530747555,
+ "eval_precision": 0.7565190072258875,
+ "eval_precision_macro": 0.857240395332689,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7765237020316027,
+ "eval_recall_macro": 0.864871979584643,
+ "eval_runtime": 0.2425,
+ "eval_samples_per_second": 672.294,
+ "eval_steps_per_second": 4.125,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7236
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533569987480307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22804181277751923,
+ "eval_pr_auc": 0.7831234273165448,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.798,
+ "eval_steps_per_second": 3.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9533826536152816,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.2279965728521347,
+ "eval_pr_auc": 0.78327266335354,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.619,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7272
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533846682425657,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.22799374163150787,
+ "eval_pr_auc": 0.7832776890714148,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.577,
+ "eval_steps_per_second": 3.801,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7290
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533931647141554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.766783328030544,
+ "eval_f1_macro": 0.86121314661739,
+ "eval_loss": 0.2279902696609497,
+ "eval_pr_auc": 0.7833151045367318,
+ "eval_precision": 0.7566718995290423,
+ "eval_precision_macro": 0.8573749596534976,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8651944562050881,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.534,
+ "eval_steps_per_second": 4.003,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7308
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9533809990904591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22801372408866882,
+ "eval_pr_auc": 0.7832695956978404,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.354,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7326
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9533805708605049,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671058898237816,
+ "eval_f1_macro": 0.8613505657612415,
+ "eval_loss": 0.2280135303735733,
+ "eval_pr_auc": 0.7832983031509244,
+ "eval_precision": 0.7554721701063164,
+ "eval_precision_macro": 0.8569406995036744,
+ "eval_pred_class_0": 16470,
+ "eval_pred_class_1": 3198,
+ "eval_predicted_binding_ratio": 0.16259914582062232,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865950622711561,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.9,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7344
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9533953253289238,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672276913305811,
+ "eval_f1_macro": 0.8614272726281818,
+ "eval_loss": 0.22799338400363922,
+ "eval_pr_auc": 0.7833637706556547,
+ "eval_precision": 0.7557084766969033,
+ "eval_precision_macro": 0.857060115344384,
+ "eval_pred_class_0": 16471,
+ "eval_pred_class_1": 3197,
+ "eval_predicted_binding_ratio": 0.16254830181004679,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.865980803190827,
+ "eval_runtime": 0.2998,
+ "eval_samples_per_second": 543.699,
+ "eval_steps_per_second": 3.336,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7362
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9534269948805303,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7672496025437202,
+ "eval_f1_macro": 0.8614738601594714,
+ "eval_loss": 0.22793784737586975,
+ "eval_pr_auc": 0.783492627588611,
+ "eval_precision": 0.7566635308874256,
+ "eval_precision_macro": 0.8574567123458305,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8656479906564896,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.309,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7380
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534421775789035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22790838778018951,
+ "eval_pr_auc": 0.7835790311478947,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.852,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7398
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9534503918080233,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670535856256957,
+ "eval_f1_macro": 0.8613613920200376,
+ "eval_loss": 0.22789432108402252,
+ "eval_pr_auc": 0.7836065049249683,
+ "eval_precision": 0.7565872020075283,
+ "eval_precision_macro": 0.8573894747901719,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865486752346267,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.102,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7416
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534621486667634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.2278737723827362,
+ "eval_pr_auc": 0.7836530325514856,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2015,
+ "eval_samples_per_second": 809.132,
+ "eval_steps_per_second": 4.964,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7434
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9534808739947569,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668575063613231,
+ "eval_f1_macro": 0.8612488935825013,
+ "eval_loss": 0.22784681618213654,
+ "eval_pr_auc": 0.7837271248134856,
+ "eval_precision": 0.7565108252274867,
+ "eval_precision_macro": 0.8573222168125176,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653255140360445,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.938,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7452
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535030056973854,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.2278076857328415,
+ "eval_pr_auc": 0.783829043372685,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2388,
+ "eval_samples_per_second": 682.596,
+ "eval_steps_per_second": 4.188,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7470
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9255135245068131,
+ "eval_auc": 0.9535103634665969,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669053301511536,
+ "eval_f1_macro": 0.861289946852225,
+ "eval_loss": 0.22778868675231934,
+ "eval_pr_auc": 0.7838714010488458,
+ "eval_precision": 0.7569095477386935,
+ "eval_precision_macro": 0.8574950553544232,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7771686552724928,
+ "eval_recall_macro": 0.8652246366843541,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.742,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 416.6666666666667,
+ "grad_norm": 16683.39453125,
+ "learning_rate": 8.236268949930852e-08,
+ "loss": 0.186,
+ "step": 7500
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535206409854957,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777557373046875,
+ "eval_pr_auc": 0.7839160806088992,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.595,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7506
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953516261360965,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22778432071208954,
+ "eval_pr_auc": 0.7838825807566469,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.836,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7524
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.953526694599847,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22777114808559418,
+ "eval_pr_auc": 0.783922204343384,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2628,
+ "eval_samples_per_second": 620.269,
+ "eval_steps_per_second": 3.805,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7542
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535399113334307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22775039076805115,
+ "eval_pr_auc": 0.7839806097795337,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 608.883,
+ "eval_steps_per_second": 3.735,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7560
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535487289774859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7671014953865734,
+ "eval_f1_macro": 0.8614024874814863,
+ "eval_loss": 0.22773513197898865,
+ "eval_pr_auc": 0.7840321361391863,
+ "eval_precision": 0.7569858712715856,
+ "eval_precision_macro": 0.8575622798085769,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7774911318929377,
+ "eval_recall_macro": 0.8653858749945765,
+ "eval_runtime": 0.2732,
+ "eval_samples_per_second": 596.635,
+ "eval_steps_per_second": 3.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7578
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535455951128217,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774243354797363,
+ "eval_pr_auc": 0.7840612126983214,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.347,
+ "eval_steps_per_second": 5.284,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7596
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9254118364856619,
+ "eval_auc": 0.9535476389376027,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7668097281831188,
+ "eval_f1_macro": 0.8612078600062212,
+ "eval_loss": 0.22774267196655273,
+ "eval_pr_auc": 0.7840628987467491,
+ "eval_precision": 0.7561128526645768,
+ "eval_precision_macro": 0.8571497629022605,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.865426391387735,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.36,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7614
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535528458245448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7670057215511761,
+ "eval_f1_macro": 0.8613203162894483,
+ "eval_loss": 0.22773417830467224,
+ "eval_pr_auc": 0.7840834648119666,
+ "eval_precision": 0.756189282356628,
+ "eval_precision_macro": 0.8572170542389439,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8655876296979576,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.927,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7632
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9254626804962376,
+ "eval_auc": 0.9535637559558758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7669316375198728,
+ "eval_f1_macro": 0.8612846167990333,
+ "eval_loss": 0.22771182656288147,
+ "eval_pr_auc": 0.7841366739811415,
+ "eval_precision": 0.7563499529633114,
+ "eval_precision_macro": 0.857269581736829,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8654565718670011,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.859,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7650
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535682523703939,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7673235855054037,
+ "eval_f1_macro": 0.8615095109466251,
+ "eval_loss": 0.22770953178405762,
+ "eval_pr_auc": 0.7841596739317596,
+ "eval_precision": 0.7565026637417738,
+ "eval_precision_macro": 0.8574040902613707,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8657790484874461,
+ "eval_runtime": 0.2634,
+ "eval_samples_per_second": 618.83,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7668
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9255643685173887,
+ "eval_auc": 0.9535677852104438,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674714104193139,
+ "eval_f1_macro": 0.8615807415292696,
+ "eval_loss": 0.22770845890045166,
+ "eval_pr_auc": 0.7841620948509175,
+ "eval_precision": 0.7561815336463223,
+ "eval_precision_macro": 0.8572991684500658,
+ "eval_pred_class_0": 16473,
+ "eval_pred_class_1": 3195,
+ "eval_predicted_binding_ratio": 0.16244661378889566,
+ "eval_recall": 0.7791035149951628,
+ "eval_recall_macro": 0.866041164149359,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.948,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7686
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535825980738566,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22767424583435059,
+ "eval_pr_auc": 0.7842346625186999,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.669,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7704
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9256152125279642,
+ "eval_auc": 0.9535961651774029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767445557145128,
+ "eval_f1_macro": 0.8615862980157476,
+ "eval_loss": 0.22765418887138367,
+ "eval_pr_auc": 0.7843006481928805,
+ "eval_precision": 0.7567398119122257,
+ "eval_precision_macro": 0.8575239295026598,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658092289667121,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.28,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536132165155758,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22762420773506165,
+ "eval_pr_auc": 0.7844000578756268,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2691,
+ "eval_samples_per_second": 605.732,
+ "eval_steps_per_second": 3.716,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7740
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536164866352254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22761479020118713,
+ "eval_pr_auc": 0.7844231309752159,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.783,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7758
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536227348995558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760987281799316,
+ "eval_pr_auc": 0.7844561314285999,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2144,
+ "eval_samples_per_second": 760.142,
+ "eval_steps_per_second": 4.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7776
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9256660565385397,
+ "eval_auc": 0.9536278736590051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7674936386768448,
+ "eval_f1_macro": 0.8616274777746364,
+ "eval_loss": 0.22760248184204102,
+ "eval_pr_auc": 0.7844848515258783,
+ "eval_precision": 0.7571383746470034,
+ "eval_precision_macro": 0.8576966674521347,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657083516150216,
+ "eval_runtime": 0.2745,
+ "eval_samples_per_second": 593.759,
+ "eval_steps_per_second": 3.643,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7794
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536346669432771,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.22759221494197845,
+ "eval_pr_auc": 0.7845111553142384,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.216,
+ "eval_samples_per_second": 754.683,
+ "eval_steps_per_second": 4.63,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7812
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536369248830353,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275882065296173,
+ "eval_pr_auc": 0.7845220230482824,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.083,
+ "eval_steps_per_second": 3.816,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7830
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536390660328059,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7678855325914149,
+ "eval_f1_macro": 0.8618523468803471,
+ "eval_loss": 0.2275806963443756,
+ "eval_pr_auc": 0.7845372505398349,
+ "eval_precision": 0.7572906867356538,
+ "eval_precision_macro": 0.8578309735638339,
+ "eval_pred_class_0": 16479,
+ "eval_pred_class_1": 3189,
+ "eval_predicted_binding_ratio": 0.16214154972544234,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660308282354665,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.493,
+ "eval_steps_per_second": 3.991,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7848
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9257169005491153,
+ "eval_auc": 0.9536480004668485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676896167912227,
+ "eval_f1_macro": 0.861739927468447,
+ "eval_loss": 0.2275666743516922,
+ "eval_pr_auc": 0.7845807540266186,
+ "eval_precision": 0.7572145545796738,
+ "eval_precision_macro": 0.8577638306878952,
+ "eval_pred_class_0": 16480,
+ "eval_pred_class_1": 3188,
+ "eval_predicted_binding_ratio": 0.16209070571486678,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8658695899252441,
+ "eval_runtime": 0.1853,
+ "eval_samples_per_second": 879.496,
+ "eval_steps_per_second": 5.396,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7866
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536552025160767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22755169868469238,
+ "eval_pr_auc": 0.7846223392518402,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2108,
+ "eval_samples_per_second": 773.198,
+ "eval_steps_per_second": 4.744,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7884
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.95366627809989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2275334894657135,
+ "eval_pr_auc": 0.784667391259121,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.311,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7902
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536672708147835,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753211855888367,
+ "eval_pr_auc": 0.7846795397266301,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2424,
+ "eval_samples_per_second": 672.466,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7920
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536694119645541,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22753164172172546,
+ "eval_pr_auc": 0.7846730527925044,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.862,
+ "eval_steps_per_second": 3.778,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7938
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536803123633861,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7677378300986318,
+ "eval_f1_macro": 0.8617811692096791,
+ "eval_loss": 0.22751472890377045,
+ "eval_pr_auc": 0.7847227034562287,
+ "eval_precision": 0.7576138147566719,
+ "eval_precision_macro": 0.8579369201187351,
+ "eval_pred_class_0": 16483,
+ "eval_pred_class_1": 3185,
+ "eval_predicted_binding_ratio": 0.16193817368314012,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657687125735536,
+ "eval_runtime": 0.2667,
+ "eval_samples_per_second": 611.172,
+ "eval_steps_per_second": 3.75,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536880691650549,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.22749866545200348,
+ "eval_pr_auc": 0.7847641543874231,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2405,
+ "eval_samples_per_second": 677.868,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7974
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.9257677445596909,
+ "eval_auc": 0.9536937626769448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7676639083386378,
+ "eval_f1_macro": 0.8617455448748739,
+ "eval_loss": 0.2274913638830185,
+ "eval_pr_auc": 0.7847876605630844,
+ "eval_precision": 0.7577756833176249,
+ "eval_precision_macro": 0.8579900557928737,
+ "eval_pred_class_0": 16485,
+ "eval_pred_class_1": 3183,
+ "eval_predicted_binding_ratio": 0.16183648566198902,
+ "eval_recall": 0.7778136085133828,
+ "eval_recall_macro": 0.8656376547425971,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.896,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7992
+ },
+ {
+ "epoch": 444.44444444444446,
+ "grad_norm": 19008.333984375,
+ "learning_rate": 3.72113927636733e-08,
+ "loss": 0.1854,
+ "step": 8000
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.953696935471605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748790681362152,
+ "eval_pr_auc": 0.7848011703648987,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.861,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8010
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9536995243163275,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748683393001556,
+ "eval_pr_auc": 0.7848132010793348,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.648,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8028
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537016265361022,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.767859984089101,
+ "eval_f1_macro": 0.861858042633035,
+ "eval_loss": 0.22748340666294098,
+ "eval_pr_auc": 0.7848243277439235,
+ "eval_precision": 0.7578517587939698,
+ "eval_precision_macro": 0.8580571582128063,
+ "eval_pred_class_0": 16484,
+ "eval_pred_class_1": 3184,
+ "eval_predicted_binding_ratio": 0.16188732967256458,
+ "eval_recall": 0.7781360851338278,
+ "eval_recall_macro": 0.8657988930528197,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.254,
+ "eval_steps_per_second": 4.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8046
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537030474809498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.2274865061044693,
+ "eval_pr_auc": 0.7848313481583303,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.69,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8064
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9258185885702664,
+ "eval_auc": 0.9537053443507039,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7679338317162399,
+ "eval_f1_macro": 0.8618936307360285,
+ "eval_loss": 0.22748111188411713,
+ "eval_pr_auc": 0.7848426067264029,
+ "eval_precision": 0.7576898932831136,
+ "eval_precision_macro": 0.8580040292771594,
+ "eval_pred_class_0": 16482,
+ "eval_pred_class_1": 3186,
+ "eval_predicted_binding_ratio": 0.16198901769371568,
+ "eval_recall": 0.7784585617542729,
+ "eval_recall_macro": 0.8659299508837761,
+ "eval_runtime": 0.2031,
+ "eval_samples_per_second": 802.733,
+ "eval_steps_per_second": 4.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8082
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537081765079003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747540473937988,
+ "eval_pr_auc": 0.7848573104950377,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.273,
+ "eval_steps_per_second": 4.413,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8100
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537093638727732,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22747638821601868,
+ "eval_pr_auc": 0.7848599055730325,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.395,
+ "eval_steps_per_second": 3.966,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8118
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953713694834809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746768593788147,
+ "eval_pr_auc": 0.7848753398216984,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.726,
+ "eval_steps_per_second": 4.011,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8136
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537144345047297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746726870536804,
+ "eval_pr_auc": 0.7848749928109487,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.248,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8154
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537162252845378,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7681297709923665,
+ "eval_f1_macro": 0.8620060619667715,
+ "eval_loss": 0.22746579349040985,
+ "eval_pr_auc": 0.7848822771158814,
+ "eval_precision": 0.7577659240665202,
+ "eval_precision_macro": 0.8580711180917517,
+ "eval_pred_class_0": 16481,
+ "eval_pred_class_1": 3187,
+ "eval_predicted_binding_ratio": 0.16203986170429124,
+ "eval_recall": 0.7787810383747178,
+ "eval_recall_macro": 0.8660911891939986,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.74,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8172
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537124685399404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747564315795898,
+ "eval_pr_auc": 0.7848568499525364,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.446,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9260219646125687,
+ "eval_auc": 0.9537148724671828,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7687172150691464,
+ "eval_f1_macro": 0.8623431740348002,
+ "eval_loss": 0.22747227549552917,
+ "eval_pr_auc": 0.7848699531009984,
+ "eval_precision": 0.7579937304075235,
+ "eval_precision_macro": 0.8582722627034582,
+ "eval_pred_class_0": 16478,
+ "eval_pred_class_1": 3190,
+ "eval_predicted_binding_ratio": 0.1621923937360179,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665749041246662,
+ "eval_runtime": 0.2171,
+ "eval_samples_per_second": 750.821,
+ "eval_steps_per_second": 4.606,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8208
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537148043396901,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747254371643066,
+ "eval_pr_auc": 0.784871702941944,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2395,
+ "eval_samples_per_second": 680.611,
+ "eval_steps_per_second": 4.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8226
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537153006971368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747036814689636,
+ "eval_pr_auc": 0.7848700532008381,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.347,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8244
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537154856146172,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.22747208178043365,
+ "eval_pr_auc": 0.7848674676035483,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2162,
+ "eval_samples_per_second": 753.78,
+ "eval_steps_per_second": 4.624,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8262
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537164783295108,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22747138142585754,
+ "eval_pr_auc": 0.7848698987106607,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.29,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8280
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.925971120601993,
+ "eval_auc": 0.9537188335942584,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.768595041322314,
+ "eval_f1_macro": 0.8622662895753321,
+ "eval_loss": 0.2274673730134964,
+ "eval_pr_auc": 0.7848777447449568,
+ "eval_precision": 0.7577561892823567,
+ "eval_precision_macro": 0.858152234351077,
+ "eval_pred_class_0": 16477,
+ "eval_pred_class_1": 3191,
+ "eval_predicted_binding_ratio": 0.16224323774659344,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665447236454001,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.971,
+ "eval_steps_per_second": 3.896,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8298
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537204881190811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22746412456035614,
+ "eval_pr_auc": 0.7848871874390749,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2183,
+ "eval_samples_per_second": 746.701,
+ "eval_steps_per_second": 4.581,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8316
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537223956888766,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274623066186905,
+ "eval_pr_auc": 0.784896188141376,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.736,
+ "eval_steps_per_second": 3.741,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8334
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537243227236701,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274596393108368,
+ "eval_pr_auc": 0.7849067717237345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2647,
+ "eval_samples_per_second": 615.711,
+ "eval_steps_per_second": 3.777,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8352
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537252959735659,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745810449123383,
+ "eval_pr_auc": 0.7849091375774293,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.717,
+ "eval_steps_per_second": 3.863,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8370
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537274565883342,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274550497531891,
+ "eval_pr_auc": 0.7849133909574995,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.839,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8388
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537294225531237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274521142244339,
+ "eval_pr_auc": 0.7849233363238722,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2621,
+ "eval_samples_per_second": 621.858,
+ "eval_steps_per_second": 3.815,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8406
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537300065030612,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.227451354265213,
+ "eval_pr_auc": 0.7849296985834374,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.691,
+ "eval_steps_per_second": 3.759,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.953731164670437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744858264923096,
+ "eval_pr_auc": 0.7849332304496031,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1831,
+ "eval_samples_per_second": 890.399,
+ "eval_steps_per_second": 5.463,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8442
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537301622230444,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22745059430599213,
+ "eval_pr_auc": 0.7849302221742441,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.282,
+ "eval_samples_per_second": 577.963,
+ "eval_steps_per_second": 3.546,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8460
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.95373253695279,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274470180273056,
+ "eval_pr_auc": 0.7849404295185345,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.882,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8478
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537337243176629,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744475305080414,
+ "eval_pr_auc": 0.7849440080112278,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.737,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8496
+ },
+ {
+ "epoch": 472.22222222222223,
+ "grad_norm": 16415.080078125,
+ "learning_rate": 9.409753403698373e-09,
+ "loss": 0.185,
+ "step": 8500
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537344250575877,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849494179086262,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 642.078,
+ "eval_steps_per_second": 3.939,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8514
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537351647275085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744259238243103,
+ "eval_pr_auc": 0.7849503915002546,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.686,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8532
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537357876074417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22744180262088776,
+ "eval_pr_auc": 0.7849530625634429,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.1819,
+ "eval_samples_per_second": 895.935,
+ "eval_steps_per_second": 5.497,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8550
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537367997873333,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274399697780609,
+ "eval_pr_auc": 0.7849568926584508,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 633.091,
+ "eval_steps_per_second": 3.884,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8568
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537370139023102,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.2274392694234848,
+ "eval_pr_auc": 0.784958754067368,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.802,
+ "eval_steps_per_second": 3.747,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8586
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9259202765914175,
+ "eval_auc": 0.9537376562472413,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7684729064039408,
+ "eval_f1_macro": 0.86218942357391,
+ "eval_loss": 0.22743819653987885,
+ "eval_pr_auc": 0.7849582401594454,
+ "eval_precision": 0.7575187969924813,
+ "eval_precision_macro": 0.85803228026366,
+ "eval_pred_class_0": 16476,
+ "eval_pred_class_1": 3192,
+ "eval_predicted_binding_ratio": 0.162294081757169,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8665145431661341,
+ "eval_runtime": 0.2266,
+ "eval_samples_per_second": 719.444,
+ "eval_steps_per_second": 4.414,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8604
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537377925022268,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849574280187238,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.921,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8622
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537384932421518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743669152259827,
+ "eval_pr_auc": 0.7849621773766102,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2648,
+ "eval_samples_per_second": 615.492,
+ "eval_steps_per_second": 3.776,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8640
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382012671831,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743773460388184,
+ "eval_pr_auc": 0.7849637957606732,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.647,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537380066172038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.227437824010849,
+ "eval_pr_auc": 0.7849623241007161,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1901,
+ "eval_samples_per_second": 857.378,
+ "eval_steps_per_second": 5.26,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8676
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537382791271747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743763029575348,
+ "eval_pr_auc": 0.7849636397572854,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2674,
+ "eval_samples_per_second": 609.618,
+ "eval_steps_per_second": 3.74,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8694
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537387365546257,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743697464466095,
+ "eval_pr_auc": 0.7849651242159179,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.398,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8712
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537391745170787,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274360954761505,
+ "eval_pr_auc": 0.7849672499360805,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1892,
+ "eval_samples_per_second": 861.673,
+ "eval_steps_per_second": 5.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8730
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739330237062,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.784968007514094,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2358,
+ "eval_samples_per_second": 691.342,
+ "eval_steps_per_second": 4.241,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8748
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.95373934970206,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743603587150574,
+ "eval_pr_auc": 0.7849684807581643,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2101,
+ "eval_samples_per_second": 775.889,
+ "eval_steps_per_second": 4.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8766
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537393886320558,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274361550807953,
+ "eval_pr_auc": 0.7849697577570401,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 652.0,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8784
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537394859570454,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743597626686096,
+ "eval_pr_auc": 0.784969480914725,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.1914,
+ "eval_samples_per_second": 851.633,
+ "eval_steps_per_second": 5.225,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8802
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395540845381,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274358570575714,
+ "eval_pr_auc": 0.784970228255774,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2687,
+ "eval_samples_per_second": 606.685,
+ "eval_steps_per_second": 3.722,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8820
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395443520391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743581235408783,
+ "eval_pr_auc": 0.7849691356929127,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.272,
+ "eval_samples_per_second": 599.26,
+ "eval_steps_per_second": 3.676,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8838
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396222120308,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743573784828186,
+ "eval_pr_auc": 0.7849709563979171,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.069,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8856
+ },
+ {
+ "epoch": 493.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739583282035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849695249246844,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.242,
+ "eval_samples_per_second": 673.593,
+ "eval_steps_per_second": 4.132,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8874
+ },
+ {
+ "epoch": 494.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395443520391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849692821960631,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.934,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8892
+ },
+ {
+ "epoch": 495.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395248870412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694151476083,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2644,
+ "eval_samples_per_second": 616.534,
+ "eval_steps_per_second": 3.782,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8910
+ },
+ {
+ "epoch": 496.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396027470327,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849695246002066,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2501,
+ "eval_samples_per_second": 651.692,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8928
+ },
+ {
+ "epoch": 497.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.953739563817037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694727996652,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2737,
+ "eval_samples_per_second": 595.465,
+ "eval_steps_per_second": 3.653,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8946
+ },
+ {
+ "epoch": 498.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395832820348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849694859174267,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.524,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8964
+ },
+ {
+ "epoch": 499.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537396027470328,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.2274356633424759,
+ "eval_pr_auc": 0.784969582259249,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.57,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8982
+ },
+ {
+ "epoch": 500.0,
+ "grad_norm": 39285.96484375,
+ "learning_rate": 3.760708838784765e-14,
+ "loss": 0.1855,
+ "step": 9000
+ },
+ {
+ "epoch": 500.0,
+ "eval_accuracy": 0.925869432580842,
+ "eval_auc": 0.9537395832820349,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7683508102955195,
+ "eval_f1_macro": 0.8621125760211936,
+ "eval_loss": 0.22743569314479828,
+ "eval_pr_auc": 0.7849695246002065,
+ "eval_precision": 0.7572815533980582,
+ "eval_precision_macro": 0.8579124003712597,
+ "eval_pred_class_0": 16475,
+ "eval_pred_class_1": 3193,
+ "eval_predicted_binding_ratio": 0.16234492576774456,
+ "eval_recall": 0.7797484682360529,
+ "eval_recall_macro": 0.8664843626868681,
+ "eval_runtime": 0.2662,
+ "eval_samples_per_second": 612.248,
+ "eval_steps_per_second": 3.756,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9000
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 9000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 6765506177901744.0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8903358100d3be09ad49078090c6e572b3ddef68
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/checkpoint-9000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499ba8a39afec206dd7194e2d216bf0be2633330bfcda3d90a12ddcbc04cdaca
+size 5368
diff --git a/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0 b/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0
new file mode 100644
index 0000000000000000000000000000000000000000..49f9d9e748c3e16965c5268987de8652f871cd9f
--- /dev/null
+++ b/finetune_glome_nano_site_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772419140.amax.1813661.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31b4f0499f3c83af64510e723eab0e1b174cbd078260871728cbc3136654212c
+size 582065
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8561eaa6ed751ddb203be5951d3ff87e5118e6b8
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7a6c9e7bebd86d828e33c602f601585df15ab364132986ff74a2c37ee1a20b5
+size 61385376
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b57b2fafe15e5b63edf036a2f9fd89ccaf3a622
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cedfd25f8179cb06ee2cdef74c23b5465b2feb98a5d90c0c18310779a53873fa
+size 122881658
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7a6cb59a2c728074b73428ae75c539651c4ea361
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b5e0b21ceddfc06d6dfc155153f6b1cb2682aebf418b0515e022b082bfcbcf5
+size 14244
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e18d4033fb579341903417eb4d0a9c2d8e62330
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:231ac688ea3b75d272ed7ef26f577919491d334410210be20bdcc895a1659ccb
+size 1064
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f724e75ea9d17a6e15191f645e9cf4ee63d5836
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/trainer_state.json
@@ -0,0 +1,12041 @@
+{
+ "best_global_step": 12818,
+ "best_metric": 0.6982375574473259,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818",
+ "epoch": 493.0,
+ "eval_steps": 500,
+ "global_step": 12818,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3402481187716087,
+ "eval_auc": 0.3906724936824889,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25902238465052535,
+ "eval_f1_macro": 0.3322238022959372,
+ "eval_loss": 1.061540961265564,
+ "eval_pr_auc": 0.12123677424188789,
+ "eval_precision": 0.15737977933523004,
+ "eval_precision_macro": 0.49946219326282143,
+ "eval_pred_class_0": 5257,
+ "eval_pred_class_1": 14411,
+ "eval_predicted_binding_ratio": 0.7327130364043116,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.49920692785748166,
+ "eval_runtime": 0.3106,
+ "eval_samples_per_second": 524.711,
+ "eval_steps_per_second": 3.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 26
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.34141753101484645,
+ "eval_auc": 0.39093619574173194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25910884859577876,
+ "eval_f1_macro": 0.33318779271412513,
+ "eval_loss": 1.0595855712890625,
+ "eval_pr_auc": 0.12129083172780017,
+ "eval_precision": 0.15748852732582394,
+ "eval_precision_macro": 0.4996674570038125,
+ "eval_pred_class_0": 5286,
+ "eval_pred_class_1": 14382,
+ "eval_predicted_binding_ratio": 0.7312385600976204,
+ "eval_recall": 0.7304095453079652,
+ "eval_recall_macro": 0.4995079053877304,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.188,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 52
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.3436038234695953,
+ "eval_auc": 0.3913807276315981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2594079853143644,
+ "eval_f1_macro": 0.3350089597864736,
+ "eval_loss": 1.0562976598739624,
+ "eval_pr_auc": 0.1213805792649038,
+ "eval_precision": 0.15776986951364175,
+ "eval_precision_macro": 0.5001890381857135,
+ "eval_pred_class_0": 5337,
+ "eval_pred_class_1": 14331,
+ "eval_predicted_binding_ratio": 0.7286455155582673,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5002814346723429,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.877,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 78
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.345688427903193,
+ "eval_auc": 0.39204411422551294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25976416450963474,
+ "eval_f1_macro": 0.3367519287310599,
+ "eval_loss": 1.0516862869262695,
+ "eval_pr_auc": 0.1215177922821225,
+ "eval_precision": 0.15807896947633715,
+ "eval_precision_macro": 0.5007519661646174,
+ "eval_pred_class_0": 5384,
+ "eval_pred_class_1": 14284,
+ "eval_predicted_binding_ratio": 0.7262558470612162,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011256608293798,
+ "eval_runtime": 0.2689,
+ "eval_samples_per_second": 606.218,
+ "eval_steps_per_second": 3.719,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 104
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3489424445800285,
+ "eval_auc": 0.39286881698964193,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25986937171261776,
+ "eval_f1_macro": 0.33937421387990774,
+ "eval_loss": 1.0457645654678345,
+ "eval_pr_auc": 0.12168361829310792,
+ "eval_precision": 0.15830985915492957,
+ "eval_precision_macro": 0.5011556611063601,
+ "eval_pred_class_0": 5468,
+ "eval_pred_class_1": 14200,
+ "eval_predicted_binding_ratio": 0.7219849501728697,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.5017466331928395,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.89,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 130
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.3526540573520439,
+ "eval_auc": 0.3938679358675814,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2595952547103978,
+ "eval_f1_macro": 0.34226375201644554,
+ "eval_loss": 1.0385552644729614,
+ "eval_pr_auc": 0.12189495582289459,
+ "eval_precision": 0.15835402625044342,
+ "eval_precision_macro": 0.5012118238196412,
+ "eval_pred_class_0": 5573,
+ "eval_pred_class_1": 14095,
+ "eval_predicted_binding_ratio": 0.7166463290624364,
+ "eval_recall": 0.7197678168332796,
+ "eval_recall_macro": 0.5018528828839544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.675,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 156
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3575859263778727,
+ "eval_auc": 0.39509779283079605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25933524825605253,
+ "eval_f1_macro": 0.34607916966826957,
+ "eval_loss": 1.0300335884094238,
+ "eval_pr_auc": 0.12215992714628282,
+ "eval_precision": 0.1584754262788365,
+ "eval_precision_macro": 0.5013918287261083,
+ "eval_pred_class_0": 5710,
+ "eval_pred_class_1": 13958,
+ "eval_predicted_binding_ratio": 0.7096806996135855,
+ "eval_recall": 0.7133182844243793,
+ "eval_recall_macro": 0.5021592327536275,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.797,
+ "eval_steps_per_second": 3.956,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 182
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.36261948342485256,
+ "eval_auc": 0.39656283563130934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2583126257247663,
+ "eval_f1_macro": 0.3497589695442054,
+ "eval_loss": 1.0202081203460693,
+ "eval_pr_auc": 0.12247236024679278,
+ "eval_precision": 0.1581769436997319,
+ "eval_precision_macro": 0.5008542806107318,
+ "eval_pred_class_0": 5867,
+ "eval_pred_class_1": 13801,
+ "eval_predicted_binding_ratio": 0.7016981899532235,
+ "eval_recall": 0.7039664624314738,
+ "eval_recall_macro": 0.5013464231032241,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.667,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 208
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3690258287573724,
+ "eval_auc": 0.39822865015280895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25848470363288717,
+ "eval_f1_macro": 0.3546848296925498,
+ "eval_loss": 1.0091010332107544,
+ "eval_pr_auc": 0.12282975659183427,
+ "eval_precision": 0.15863586358635864,
+ "eval_precision_macro": 0.5015788301853557,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6975169300225733,
+ "eval_recall_macro": 0.5025280068716114,
+ "eval_runtime": 0.2623,
+ "eval_samples_per_second": 621.417,
+ "eval_steps_per_second": 3.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.37553386211104334,
+ "eval_auc": 0.4001638991754374,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25788519637462237,
+ "eval_f1_macro": 0.35943500580602444,
+ "eval_loss": 0.9966734647750854,
+ "eval_pr_auc": 0.12325069957928089,
+ "eval_precision": 0.1586735073239646,
+ "eval_precision_macro": 0.5015911353953799,
+ "eval_pred_class_0": 6219,
+ "eval_pred_class_1": 13449,
+ "eval_predicted_binding_ratio": 0.6838010982306284,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.5025904311199223,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.918,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 260
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.38382143583485867,
+ "eval_auc": 0.4023744221985687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25855001529519733,
+ "eval_f1_macro": 0.3657153538700335,
+ "eval_loss": 0.9828852415084839,
+ "eval_pr_auc": 0.12373084625745168,
+ "eval_precision": 0.15954394442766537,
+ "eval_precision_macro": 0.5028728439448414,
+ "eval_pred_class_0": 6424,
+ "eval_pred_class_1": 13244,
+ "eval_predicted_binding_ratio": 0.6733780760626398,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.5047576347901956,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.475,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 286
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3912955053894651,
+ "eval_auc": 0.40482715792324586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2574122317330356,
+ "eval_f1_macro": 0.370844480646392,
+ "eval_loss": 0.9678097367286682,
+ "eval_pr_auc": 0.12427357405982056,
+ "eval_precision": 0.15935796021810922,
+ "eval_precision_macro": 0.5025013059703454,
+ "eval_pred_class_0": 6647,
+ "eval_pred_class_1": 13021,
+ "eval_predicted_binding_ratio": 0.6620398617042912,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.5042139676659523,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.751,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 312
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.4013117754728493,
+ "eval_auc": 0.40764224431659535,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2572383775941462,
+ "eval_f1_macro": 0.3779059068484294,
+ "eval_loss": 0.9513856172561646,
+ "eval_pr_auc": 0.12488748600523823,
+ "eval_precision": 0.15989648682559598,
+ "eval_precision_macro": 0.5031697587395765,
+ "eval_pred_class_0": 6916,
+ "eval_pred_class_1": 12752,
+ "eval_predicted_binding_ratio": 0.6483628228594671,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.5054414401669225,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.633,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 338
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.4099552572706935,
+ "eval_auc": 0.4108023769954713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25432114630855235,
+ "eval_f1_macro": 0.38308115532732967,
+ "eval_loss": 0.9335527420043945,
+ "eval_pr_auc": 0.12558462856716973,
+ "eval_precision": 0.15880276039159044,
+ "eval_precision_macro": 0.5015495900209409,
+ "eval_pred_class_0": 7206,
+ "eval_pred_class_1": 12462,
+ "eval_predicted_binding_ratio": 0.6336180597925565,
+ "eval_recall": 0.6381812318606901,
+ "eval_recall_macro": 0.5027086517847544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.718,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 364
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.41844620703681107,
+ "eval_auc": 0.4144857969457745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2516356974613975,
+ "eval_f1_macro": 0.3880413644466475,
+ "eval_loss": 0.9142351150512695,
+ "eval_pr_auc": 0.1264136402678906,
+ "eval_precision": 0.15784289583846342,
+ "eval_precision_macro": 0.5002307331563727,
+ "eval_pred_class_0": 7485,
+ "eval_pred_class_1": 12183,
+ "eval_predicted_binding_ratio": 0.6194325808419768,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.5004095532886144,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.136,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 390
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.42897091722595077,
+ "eval_auc": 0.41858189431685716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24961582147390926,
+ "eval_f1_macro": 0.3943717007980979,
+ "eval_loss": 0.8934236168861389,
+ "eval_pr_auc": 0.12736412734017702,
+ "eval_precision": 0.15742457441429294,
+ "eval_precision_macro": 0.4996940867457263,
+ "eval_pred_class_0": 7802,
+ "eval_pred_class_1": 11866,
+ "eval_predicted_binding_ratio": 0.6033150294895261,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.4994487317940711,
+ "eval_runtime": 0.2416,
+ "eval_samples_per_second": 674.578,
+ "eval_steps_per_second": 4.139,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 416
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.4378177750660972,
+ "eval_auc": 0.42318268015385996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24520445081575534,
+ "eval_f1_macro": 0.3986584493314002,
+ "eval_loss": 0.8710600733757019,
+ "eval_pr_auc": 0.12844830521974454,
+ "eval_precision": 0.15552476619328023,
+ "eval_precision_macro": 0.4974052402394973,
+ "eval_pred_class_0": 8120,
+ "eval_pred_class_1": 11548,
+ "eval_predicted_binding_ratio": 0.5871466341264999,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.4952639713574891,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.27,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 442
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4492576774455969,
+ "eval_auc": 0.4283753771124365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23911211014329867,
+ "eval_f1_macro": 0.4037791626413705,
+ "eval_loss": 0.8472632765769958,
+ "eval_pr_auc": 0.12969206942947384,
+ "eval_precision": 0.15285136955545578,
+ "eval_precision_macro": 0.4944498263457579,
+ "eval_pred_class_0": 8533,
+ "eval_pred_class_1": 11135,
+ "eval_predicted_binding_ratio": 0.5661480577587961,
+ "eval_recall": 0.5488552079974202,
+ "eval_recall_macro": 0.4897351430824307,
+ "eval_runtime": 0.281,
+ "eval_samples_per_second": 580.087,
+ "eval_steps_per_second": 3.559,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.46339231238560097,
+ "eval_auc": 0.4341240430739382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23588184187662903,
+ "eval_f1_macro": 0.41119432949496704,
+ "eval_loss": 0.8218646049499512,
+ "eval_pr_auc": 0.13110819406948146,
+ "eval_precision": 0.15208663990290355,
+ "eval_precision_macro": 0.4938729504080779,
+ "eval_pred_class_0": 8957,
+ "eval_pred_class_1": 10711,
+ "eval_predicted_binding_ratio": 0.544590197274761,
+ "eval_recall": 0.5253144147049339,
+ "eval_recall_macro": 0.4885580946585574,
+ "eval_runtime": 0.2447,
+ "eval_samples_per_second": 666.13,
+ "eval_steps_per_second": 4.087,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 494
+ },
+ {
+ "epoch": 19.23076923076923,
+ "grad_norm": 232728.109375,
+ "learning_rate": 3.8384615384615384e-07,
+ "loss": 0.99,
+ "step": 500
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4798657718120805,
+ "eval_auc": 0.44078220133048324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2309427153811457,
+ "eval_f1_macro": 0.4189975157915178,
+ "eval_loss": 0.7945711016654968,
+ "eval_pr_auc": 0.1327664236209388,
+ "eval_precision": 0.15057347318890305,
+ "eval_precision_macro": 0.49263119629657465,
+ "eval_pred_class_0": 9467,
+ "eval_pred_class_1": 10201,
+ "eval_predicted_binding_ratio": 0.5186597518812284,
+ "eval_recall": 0.49532408900354724,
+ "eval_recall_macro": 0.4861481916617905,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.734,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 520
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.49964409192597115,
+ "eval_auc": 0.4482004774880778,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22407947646455886,
+ "eval_f1_macro": 0.42742637388305044,
+ "eval_loss": 0.765658974647522,
+ "eval_pr_auc": 0.1347110745909903,
+ "eval_precision": 0.14829889375913172,
+ "eval_precision_macro": 0.4908656872127009,
+ "eval_pred_class_0": 10086,
+ "eval_pred_class_1": 9582,
+ "eval_predicted_binding_ratio": 0.48718730933496035,
+ "eval_recall": 0.4582392776523702,
+ "eval_recall_macro": 0.48281674753627146,
+ "eval_runtime": 0.2494,
+ "eval_samples_per_second": 653.647,
+ "eval_steps_per_second": 4.01,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 546
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.5294895261338214,
+ "eval_auc": 0.4563512312496838,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22025615099427032,
+ "eval_f1_macro": 0.44167751484473966,
+ "eval_loss": 0.7351489067077637,
+ "eval_pr_auc": 0.13684095333600696,
+ "eval_precision": 0.14908178396258698,
+ "eval_precision_macro": 0.49225486317659667,
+ "eval_pred_class_0": 10901,
+ "eval_pred_class_1": 8767,
+ "eval_predicted_binding_ratio": 0.4457494407158837,
+ "eval_recall": 0.4214769429216382,
+ "eval_recall_macro": 0.48559209613637894,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.993,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 572
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.564317673378076,
+ "eval_auc": 0.46539531162556536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21679919568595193,
+ "eval_f1_macro": 0.45751035677940843,
+ "eval_loss": 0.7033414244651794,
+ "eval_pr_auc": 0.1392772958743257,
+ "eval_precision": 0.15127551020408164,
+ "eval_precision_macro": 0.49468577674559844,
+ "eval_pred_class_0": 11828,
+ "eval_pred_class_1": 7840,
+ "eval_predicted_binding_ratio": 0.3986170429123449,
+ "eval_recall": 0.38245727184779105,
+ "eval_recall_macro": 0.49040772688786005,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.699,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 598
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.6033150294895261,
+ "eval_auc": 0.4754341993823483,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21223747980613894,
+ "eval_f1_macro": 0.4735759293567254,
+ "eval_loss": 0.6706362962722778,
+ "eval_pr_auc": 0.14231930535250045,
+ "eval_precision": 0.1544906658826988,
+ "eval_precision_macro": 0.4975718001003078,
+ "eval_pred_class_0": 12865,
+ "eval_pred_class_1": 6803,
+ "eval_predicted_binding_ratio": 0.34589180394549524,
+ "eval_recall": 0.3389229280877136,
+ "eval_recall_macro": 0.49586334730576304,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.356,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 624
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.6474984746796827,
+ "eval_auc": 0.4864891211002582,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2075665790376043,
+ "eval_f1_macro": 0.4904508280155492,
+ "eval_loss": 0.6374054551124573,
+ "eval_pr_auc": 0.14557281943245967,
+ "eval_precision": 0.16076487252124647,
+ "eval_precision_macro": 0.5021727358326632,
+ "eval_pred_class_0": 14020,
+ "eval_pred_class_1": 5648,
+ "eval_predicted_binding_ratio": 0.28716697173073014,
+ "eval_recall": 0.2928087713640761,
+ "eval_recall_macro": 0.5033489139611471,
+ "eval_runtime": 0.2341,
+ "eval_samples_per_second": 696.138,
+ "eval_steps_per_second": 4.271,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 650
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.6925462680496237,
+ "eval_auc": 0.49869307137754393,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20360858685631503,
+ "eval_f1_macro": 0.5065549471155847,
+ "eval_loss": 0.6044979691505432,
+ "eval_pr_auc": 0.14935675594952297,
+ "eval_precision": 0.17208370436331255,
+ "eval_precision_macro": 0.5093417994668434,
+ "eval_pred_class_0": 15176,
+ "eval_pred_class_1": 4492,
+ "eval_predicted_binding_ratio": 0.22839129550538947,
+ "eval_recall": 0.2492744276039987,
+ "eval_recall_macro": 0.5123960114117054,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.94,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 676
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.7326113483831604,
+ "eval_auc": 0.5121322314924708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1977116704805492,
+ "eval_f1_macro": 0.5186416868006297,
+ "eval_loss": 0.5727357268333435,
+ "eval_pr_auc": 0.15383837227298106,
+ "eval_precision": 0.18760856977417487,
+ "eval_precision_macro": 0.518159780138105,
+ "eval_pred_class_0": 16214,
+ "eval_pred_class_1": 3454,
+ "eval_predicted_binding_ratio": 0.1756152125279642,
+ "eval_recall": 0.20896485004837148,
+ "eval_recall_macro": 0.5197960002037596,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.309,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.7681513117754728,
+ "eval_auc": 0.5270188672472933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18947742623533595,
+ "eval_f1_macro": 0.5271029967130403,
+ "eval_loss": 0.5426873564720154,
+ "eval_pr_auc": 0.1589999639181036,
+ "eval_precision": 0.21108910891089108,
+ "eval_precision_macro": 0.5306451786169109,
+ "eval_pred_class_0": 17143,
+ "eval_pred_class_1": 2525,
+ "eval_predicted_binding_ratio": 0.12838112670327437,
+ "eval_recall": 0.17188003869719445,
+ "eval_recall_macro": 0.5258205046507038,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.973,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 728
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.7986068741102298,
+ "eval_auc": 0.5437619187831915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18076525336091004,
+ "eval_f1_macro": 0.5329784934669249,
+ "eval_loss": 0.5149086117744446,
+ "eval_pr_auc": 0.16564494894795073,
+ "eval_precision": 0.2520184544405998,
+ "eval_precision_macro": 0.5517368953367268,
+ "eval_pred_class_0": 17934,
+ "eval_pred_class_1": 1734,
+ "eval_predicted_binding_ratio": 0.08816351433801098,
+ "eval_recall": 0.14092228313447275,
+ "eval_recall_macro": 0.5313170599592205,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.786,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 754
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.8241814114297336,
+ "eval_auc": 0.5629712926123112,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17548879351454458,
+ "eval_f1_macro": 0.5385440097559633,
+ "eval_loss": 0.4897482395172119,
+ "eval_pr_auc": 0.17432371223202417,
+ "eval_precision": 0.3366880146386093,
+ "eval_precision_macro": 0.5947773855158054,
+ "eval_pred_class_0": 18575,
+ "eval_pred_class_1": 1093,
+ "eval_predicted_binding_ratio": 0.05557250355908074,
+ "eval_recall": 0.11867139632376653,
+ "eval_recall_macro": 0.5374548506940254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.589,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 780
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.8372991661582265,
+ "eval_auc": 0.5843234707368185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16883116883116883,
+ "eval_f1_macro": 0.5393273806169032,
+ "eval_loss": 0.46770623326301575,
+ "eval_pr_auc": 0.18537280435724188,
+ "eval_precision": 0.43391188251001334,
+ "eval_precision_macro": 0.6435905413924347,
+ "eval_pred_class_0": 18919,
+ "eval_pred_class_1": 749,
+ "eval_predicted_binding_ratio": 0.038082163921090095,
+ "eval_recall": 0.10480490164463076,
+ "eval_recall_macro": 0.5396059276135269,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.108,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 806
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.8421293471629042,
+ "eval_auc": 0.6080693861773249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16012983500135244,
+ "eval_f1_macro": 0.5365030891665479,
+ "eval_loss": 0.44841739535331726,
+ "eval_pr_auc": 0.1997259509611161,
+ "eval_precision": 0.4966442953020134,
+ "eval_precision_macro": 0.6747850251677853,
+ "eval_pred_class_0": 19072,
+ "eval_pred_class_1": 596,
+ "eval_predicted_binding_ratio": 0.030303030303030304,
+ "eval_recall": 0.09545307965172525,
+ "eval_recall_macro": 0.5386723960460594,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.915,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 832
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.8450782997762863,
+ "eval_auc": 0.6341019717128032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16497670594683475,
+ "eval_f1_macro": 0.5397977373430758,
+ "eval_loss": 0.4312308728694916,
+ "eval_pr_auc": 0.2181951536640109,
+ "eval_precision": 0.5492700729927007,
+ "eval_precision_macro": 0.7014132791741746,
+ "eval_pred_class_0": 19120,
+ "eval_pred_class_1": 548,
+ "eval_predicted_binding_ratio": 0.0278625177954037,
+ "eval_recall": 0.09706546275395034,
+ "eval_recall_macro": 0.5410781529982704,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.04,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 858
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.8478238763473663,
+ "eval_auc": 0.6614343616815009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17932547299149987,
+ "eval_f1_macro": 0.5477310488609045,
+ "eval_loss": 0.41584137082099915,
+ "eval_pr_auc": 0.24110190314317137,
+ "eval_precision": 0.5989010989010989,
+ "eval_precision_macro": 0.7269162957114008,
+ "eval_pred_class_0": 19122,
+ "eval_pred_class_1": 546,
+ "eval_predicted_binding_ratio": 0.027760829774252593,
+ "eval_recall": 0.1054498548855208,
+ "eval_recall_macro": 0.546115402483504,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.016,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 884
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.8509253609924751,
+ "eval_auc": 0.6891114086357669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20585048754062837,
+ "eval_f1_macro": 0.5617963020129356,
+ "eval_loss": 0.4015716016292572,
+ "eval_pr_auc": 0.2683830744239665,
+ "eval_precision": 0.6429780033840947,
+ "eval_precision_macro": 0.7501727569994856,
+ "eval_pred_class_0": 19077,
+ "eval_pred_class_1": 591,
+ "eval_predicted_binding_ratio": 0.030048810250152533,
+ "eval_recall": 0.12254111576910674,
+ "eval_recall_macro": 0.5549024767594251,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.872,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 910
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8537217815741306,
+ "eval_auc": 0.7168296727231165,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2366675510745556,
+ "eval_f1_macro": 0.5778889812054533,
+ "eval_loss": 0.3880736827850342,
+ "eval_pr_auc": 0.2994175694348318,
+ "eval_precision": 0.6676646706586826,
+ "eval_precision_macro": 0.7639639142767097,
+ "eval_pred_class_0": 19000,
+ "eval_pred_class_1": 668,
+ "eval_predicted_binding_ratio": 0.0339637990644702,
+ "eval_recall": 0.1438245727184779,
+ "eval_recall_macro": 0.5652122199621845,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.227,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.8565690461663616,
+ "eval_auc": 0.743181046261935,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2768520892078954,
+ "eval_f1_macro": 0.59862076733571,
+ "eval_loss": 0.37574923038482666,
+ "eval_pr_auc": 0.33279138215623166,
+ "eval_precision": 0.675,
+ "eval_precision_macro": 0.7696337714649142,
+ "eval_pred_class_0": 18868,
+ "eval_pred_class_1": 800,
+ "eval_predicted_binding_ratio": 0.04067520846044336,
+ "eval_recall": 0.17413737504030957,
+ "eval_recall_macro": 0.5792217629109919,
+ "eval_runtime": 0.2774,
+ "eval_samples_per_second": 587.664,
+ "eval_steps_per_second": 3.605,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 962
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8596705308114704,
+ "eval_auc": 0.7678773986205973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3205317577548006,
+ "eval_f1_macro": 0.6211435791665652,
+ "eval_loss": 0.36427780985832214,
+ "eval_pr_auc": 0.3671211589285648,
+ "eval_precision": 0.6774193548387096,
+ "eval_precision_macro": 0.7732261685723991,
+ "eval_pred_class_0": 18707,
+ "eval_pred_class_1": 961,
+ "eval_predicted_binding_ratio": 0.048861094163107584,
+ "eval_recall": 0.20993227990970656,
+ "eval_recall_macro": 0.5956101913823899,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.252,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 988
+ },
+ {
+ "epoch": 38.46153846153846,
+ "grad_norm": 35024.03515625,
+ "learning_rate": 7.684615384615384e-07,
+ "loss": 0.5725,
+ "step": 1000
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8642464917632703,
+ "eval_auc": 0.7904617013805764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3723554301833568,
+ "eval_f1_macro": 0.6481240123381296,
+ "eval_loss": 0.35397008061408997,
+ "eval_pr_auc": 0.40223746916130343,
+ "eval_precision": 0.6869037294015612,
+ "eval_precision_macro": 0.7810970172797707,
+ "eval_pred_class_0": 18515,
+ "eval_pred_class_1": 1153,
+ "eval_predicted_binding_ratio": 0.058623144193613995,
+ "eval_recall": 0.25540148339245405,
+ "eval_recall_macro": 0.6168055886811972,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.77,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1014
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.867246288387228,
+ "eval_auc": 0.8102996097248453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4125984251968504,
+ "eval_f1_macro": 0.6688826868467987,
+ "eval_loss": 0.3446972072124481,
+ "eval_pr_auc": 0.43559149314237056,
+ "eval_precision": 0.6822916666666666,
+ "eval_precision_macro": 0.7815518582187295,
+ "eval_pred_class_0": 18324,
+ "eval_pred_class_1": 1344,
+ "eval_predicted_binding_ratio": 0.06833435021354485,
+ "eval_recall": 0.29571106094808125,
+ "eval_recall_macro": 0.634968465827454,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.748,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1040
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8715171852755745,
+ "eval_auc": 0.8272611461298317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4583065380493033,
+ "eval_f1_macro": 0.6927107089600444,
+ "eval_loss": 0.33654505014419556,
+ "eval_pr_auc": 0.4645782536288223,
+ "eval_precision": 0.6835038363171355,
+ "eval_precision_macro": 0.7856317237263981,
+ "eval_pred_class_0": 18104,
+ "eval_pred_class_1": 1564,
+ "eval_predicted_binding_ratio": 0.07952003254016676,
+ "eval_recall": 0.344727507255724,
+ "eval_recall_macro": 0.6574244163911867,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.484,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1066
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8743136058572301,
+ "eval_auc": 0.8416796876148132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4898885678910442,
+ "eval_f1_macro": 0.7091078096051335,
+ "eval_loss": 0.329649955034256,
+ "eval_pr_auc": 0.49067495219464874,
+ "eval_precision": 0.6802292263610316,
+ "eval_precision_macro": 0.7867195342316791,
+ "eval_pred_class_0": 17923,
+ "eval_pred_class_1": 1745,
+ "eval_predicted_binding_ratio": 0.08872279845434208,
+ "eval_recall": 0.38277974846823604,
+ "eval_recall_macro": 0.674549166803684,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.276,
+ "eval_steps_per_second": 3.787,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1092
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8758897701850722,
+ "eval_auc": 0.8534597097025247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5176842521240862,
+ "eval_f1_macro": 0.7232330815689724,
+ "eval_loss": 0.32387641072273254,
+ "eval_pr_auc": 0.5115876936649595,
+ "eval_precision": 0.6683673469387755,
+ "eval_precision_macro": 0.7836133097919538,
+ "eval_pred_class_0": 17708,
+ "eval_pred_class_1": 1960,
+ "eval_predicted_binding_ratio": 0.09965426072808622,
+ "eval_recall": 0.42244437278297325,
+ "eval_recall_macro": 0.6916048748685797,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.429,
+ "eval_steps_per_second": 3.8,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1118
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.863260959032272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5468392993145469,
+ "eval_f1_macro": 0.7385059071387897,
+ "eval_loss": 0.3189404308795929,
+ "eval_pr_auc": 0.5291286431025274,
+ "eval_precision": 0.6675964667596467,
+ "eval_precision_macro": 0.7862729722049646,
+ "eval_pred_class_0": 17517,
+ "eval_pred_class_1": 2151,
+ "eval_predicted_binding_ratio": 0.10936546674801709,
+ "eval_recall": 0.4630764269590455,
+ "eval_recall_macro": 0.7099591708043251,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.993,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1144
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8797030709782387,
+ "eval_auc": 0.8710211865407248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5603864734299517,
+ "eval_f1_macro": 0.7453519808982827,
+ "eval_loss": 0.3149340748786926,
+ "eval_pr_auc": 0.5420378923897758,
+ "eval_precision": 0.6611135466900482,
+ "eval_precision_macro": 0.7847466853482449,
+ "eval_pred_class_0": 17387,
+ "eval_pred_class_1": 2281,
+ "eval_predicted_binding_ratio": 0.11597518812283913,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719817861342917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.753,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8811775472849298,
+ "eval_auc": 0.8772876506442417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5748590140076405,
+ "eval_f1_macro": 0.7528983447354317,
+ "eval_loss": 0.3115498721599579,
+ "eval_pr_auc": 0.5526462799402374,
+ "eval_precision": 0.659432387312187,
+ "eval_precision_macro": 0.7856853923591968,
+ "eval_pred_class_0": 17272,
+ "eval_pred_class_1": 2396,
+ "eval_predicted_binding_ratio": 0.12182224933902787,
+ "eval_recall": 0.509513060303128,
+ "eval_recall_macro": 0.7301292590704993,
+ "eval_runtime": 0.247,
+ "eval_samples_per_second": 659.808,
+ "eval_steps_per_second": 4.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1196
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8817368314012609,
+ "eval_auc": 0.8824923380415335,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5840486409155937,
+ "eval_f1_macro": 0.7575589340187262,
+ "eval_loss": 0.3087034523487091,
+ "eval_pr_auc": 0.5616002050007283,
+ "eval_precision": 0.6555600160578081,
+ "eval_precision_macro": 0.7850484483851945,
+ "eval_pred_class_0": 17177,
+ "eval_pred_class_1": 2491,
+ "eval_predicted_binding_ratio": 0.12665243034370552,
+ "eval_recall": 0.526604321186714,
+ "eval_recall_macro": 0.7374073093831197,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.602,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1222
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8833638397396787,
+ "eval_auc": 0.8867719903429474,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5951288386869044,
+ "eval_f1_macro": 0.7634984852775182,
+ "eval_loss": 0.30617523193359375,
+ "eval_pr_auc": 0.5687331552143856,
+ "eval_precision": 0.6573099415204678,
+ "eval_precision_macro": 0.7872879591248483,
+ "eval_pred_class_0": 17103,
+ "eval_pred_class_1": 2565,
+ "eval_predicted_binding_ratio": 0.13041488712629654,
+ "eval_recall": 0.5436955820702999,
+ "eval_recall_macro": 0.7453191497603264,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.489,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1248
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8840248118771609,
+ "eval_auc": 0.8901050792607902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6022667829119442,
+ "eval_f1_macro": 0.7671909492667516,
+ "eval_loss": 0.3041446805000305,
+ "eval_pr_auc": 0.5742293420515451,
+ "eval_precision": 0.6556567957479119,
+ "eval_precision_macro": 0.7874972953730754,
+ "eval_pred_class_0": 17034,
+ "eval_pred_class_1": 2634,
+ "eval_predicted_binding_ratio": 0.13392312385600977,
+ "eval_recall": 0.5569171235085456,
+ "eval_recall_macro": 0.751084867060001,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1274
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8846349400040675,
+ "eval_auc": 0.8931467576948593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6055970797844603,
+ "eval_f1_macro": 0.7690165668701654,
+ "eval_loss": 0.30221912264823914,
+ "eval_pr_auc": 0.5797467982851593,
+ "eval_precision": 0.6568627450980392,
+ "eval_precision_macro": 0.7884983683177079,
+ "eval_pred_class_0": 17016,
+ "eval_pred_class_1": 2652,
+ "eval_predicted_binding_ratio": 0.13483831604636973,
+ "eval_recall": 0.561754272815221,
+ "eval_recall_macro": 0.7534129002755408,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.805,
+ "eval_steps_per_second": 3.993,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1300
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8854484441732764,
+ "eval_auc": 0.8956789398085232,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6095997227516895,
+ "eval_f1_macro": 0.7712381155096151,
+ "eval_loss": 0.30062082409858704,
+ "eval_pr_auc": 0.5844826815319759,
+ "eval_precision": 0.6588014981273408,
+ "eval_precision_macro": 0.7899255166833903,
+ "eval_pred_class_0": 16998,
+ "eval_pred_class_1": 2670,
+ "eval_predicted_binding_ratio": 0.13575350823672971,
+ "eval_recall": 0.5672363753627861,
+ "eval_recall_macro": 0.7561237710700572,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.92,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1326
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8860077282896075,
+ "eval_auc": 0.8977014114868052,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6157010627356874,
+ "eval_f1_macro": 0.774389842453749,
+ "eval_loss": 0.2989857792854309,
+ "eval_pr_auc": 0.5879586440077966,
+ "eval_precision": 0.6571533113794366,
+ "eval_precision_macro": 0.7900469834133675,
+ "eval_pred_class_0": 16935,
+ "eval_pred_class_1": 2733,
+ "eval_predicted_binding_ratio": 0.13895668090298963,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7613048960873738,
+ "eval_runtime": 0.2404,
+ "eval_samples_per_second": 677.963,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1352
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.8864144803742119,
+ "eval_auc": 0.8993954307902827,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6165465156196361,
+ "eval_f1_macro": 0.7749399244764847,
+ "eval_loss": 0.29775407910346985,
+ "eval_pr_auc": 0.5914083972949268,
+ "eval_precision": 0.6590825688073394,
+ "eval_precision_macro": 0.7910298047365505,
+ "eval_pred_class_0": 16943,
+ "eval_pred_class_1": 2725,
+ "eval_predicted_binding_ratio": 0.13854992881838518,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7615463399215019,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.94,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1378
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8866178564165141,
+ "eval_auc": 0.9007296980023092,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6186730506155951,
+ "eval_f1_macro": 0.7760410164688105,
+ "eval_loss": 0.296587198972702,
+ "eval_pr_auc": 0.59415963293408,
+ "eval_precision": 0.6585365853658537,
+ "eval_precision_macro": 0.7910908800004612,
+ "eval_pred_class_0": 16921,
+ "eval_pred_class_1": 2747,
+ "eval_predicted_binding_ratio": 0.1396684970510474,
+ "eval_recall": 0.583360206385037,
+ "eval_recall_macro": 0.7633708136410005,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.694,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8872279845434208,
+ "eval_auc": 0.9019074471661075,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6215017064846416,
+ "eval_f1_macro": 0.7776226419864958,
+ "eval_loss": 0.2955063581466675,
+ "eval_pr_auc": 0.5967231989416606,
+ "eval_precision": 0.6600217470097861,
+ "eval_precision_macro": 0.7921612076464745,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5872299258303773,
+ "eval_recall_macro": 0.7653056733636705,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.741,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1430
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8881431767337807,
+ "eval_auc": 0.9030401348597343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6235455167693361,
+ "eval_f1_macro": 0.778929094226063,
+ "eval_loss": 0.29450690746307373,
+ "eval_pr_auc": 0.5995879576354929,
+ "eval_precision": 0.6642362376959533,
+ "eval_precision_macro": 0.7943337761596458,
+ "eval_pred_class_0": 16925,
+ "eval_pred_class_1": 2743,
+ "eval_predicted_binding_ratio": 0.13946512100874517,
+ "eval_recall": 0.5875524024508223,
+ "eval_recall_macro": 0.7659799798214153,
+ "eval_runtime": 0.2658,
+ "eval_samples_per_second": 613.135,
+ "eval_steps_per_second": 3.762,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1456
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9040587382005859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6243802359377671,
+ "eval_f1_macro": 0.7793863433697854,
+ "eval_loss": 0.2936408519744873,
+ "eval_pr_auc": 0.6024898616264603,
+ "eval_precision": 0.6644832605531296,
+ "eval_precision_macro": 0.7945643253120258,
+ "eval_pred_class_0": 16920,
+ "eval_pred_class_1": 2748,
+ "eval_predicted_binding_ratio": 0.13971934106162295,
+ "eval_recall": 0.5888423089326024,
+ "eval_recall_macro": 0.7665947525830392,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.369,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1482
+ },
+ {
+ "epoch": 57.69230769230769,
+ "grad_norm": 15613.5302734375,
+ "learning_rate": 9.992863736980368e-07,
+ "loss": 0.3115,
+ "step": 1500
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9048886089216611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6246808510638298,
+ "eval_f1_macro": 0.7793916194591735,
+ "eval_loss": 0.29281434416770935,
+ "eval_pr_auc": 0.603713292882509,
+ "eval_precision": 0.6614996395097332,
+ "eval_precision_macro": 0.7932808958765667,
+ "eval_pred_class_0": 16894,
+ "eval_pred_class_1": 2774,
+ "eval_predicted_binding_ratio": 0.14104128533658736,
+ "eval_recall": 0.5917445985166075,
+ "eval_recall_macro": 0.7675328292275196,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.428,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1508
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9056672866982218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6251912289648138,
+ "eval_f1_macro": 0.7796389289833485,
+ "eval_loss": 0.2920655906200409,
+ "eval_pr_auc": 0.6054694565410179,
+ "eval_precision": 0.6610352264557872,
+ "eval_precision_macro": 0.7931493791878604,
+ "eval_pred_class_0": 16886,
+ "eval_pred_class_1": 2782,
+ "eval_predicted_binding_ratio": 0.14144803742119177,
+ "eval_recall": 0.5930345049983876,
+ "eval_recall_macro": 0.7680570605513457,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.644,
+ "eval_steps_per_second": 4.255,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1534
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9063294664622661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6254049445865303,
+ "eval_f1_macro": 0.7798830166450921,
+ "eval_loss": 0.29136955738067627,
+ "eval_pr_auc": 0.6071731602747702,
+ "eval_precision": 0.6635311143270622,
+ "eval_precision_macro": 0.7942892202018652,
+ "eval_pred_class_0": 16904,
+ "eval_pred_class_1": 2764,
+ "eval_predicted_binding_ratio": 0.14053284523083182,
+ "eval_recall": 0.5914221218961625,
+ "eval_recall_macro": 0.7676432152306912,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.702,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1560
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8886007728289608,
+ "eval_auc": 0.9070085321120007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6269368295589988,
+ "eval_f1_macro": 0.7807307642400976,
+ "eval_loss": 0.290680855512619,
+ "eval_pr_auc": 0.6088679721523397,
+ "eval_precision": 0.6641414141414141,
+ "eval_precision_macro": 0.7947837752525253,
+ "eval_pred_class_0": 16896,
+ "eval_pred_class_1": 2772,
+ "eval_predicted_binding_ratio": 0.14093959731543623,
+ "eval_recall": 0.5936794582392777,
+ "eval_recall_macro": 0.7687417029229828,
+ "eval_runtime": 0.2486,
+ "eval_samples_per_second": 655.642,
+ "eval_steps_per_second": 4.022,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1586
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9076136113046634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6273841961852861,
+ "eval_f1_macro": 0.7810002501366307,
+ "eval_loss": 0.29004454612731934,
+ "eval_pr_auc": 0.6100991712198425,
+ "eval_precision": 0.664741970407795,
+ "eval_precision_macro": 0.7951158511564335,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.5940019348597226,
+ "eval_recall_macro": 0.7689633021917374,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.871,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1612
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8888041488712629,
+ "eval_auc": 0.9081136281710841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6286296484971982,
+ "eval_f1_macro": 0.781621309135136,
+ "eval_loss": 0.28950682282447815,
+ "eval_pr_auc": 0.6111782063777282,
+ "eval_precision": 0.6639167862266858,
+ "eval_precision_macro": 0.794932326762632,
+ "eval_pred_class_0": 16880,
+ "eval_pred_class_1": 2788,
+ "eval_predicted_binding_ratio": 0.14175310148464512,
+ "eval_recall": 0.5969042244437278,
+ "eval_recall_macro": 0.7701730031496119,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.907,
+ "eval_steps_per_second": 4.018,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9086206913667498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6299319727891156,
+ "eval_f1_macro": 0.7824456611913058,
+ "eval_loss": 0.2888965606689453,
+ "eval_pr_auc": 0.6126297306007413,
+ "eval_precision": 0.6664267722202231,
+ "eval_precision_macro": 0.7962366556938643,
+ "eval_pred_class_0": 16889,
+ "eval_pred_class_1": 2779,
+ "eval_predicted_binding_ratio": 0.14129550538946511,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7706360462524945,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.362,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1664
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8900244051250763,
+ "eval_auc": 0.9091278518874051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6305721605465414,
+ "eval_f1_macro": 0.782984177701663,
+ "eval_loss": 0.2884848117828369,
+ "eval_pr_auc": 0.6142560104629078,
+ "eval_precision": 0.6702977487291213,
+ "eval_precision_macro": 0.7980494301171916,
+ "eval_pred_class_0": 16914,
+ "eval_pred_class_1": 2754,
+ "eval_predicted_binding_ratio": 0.14002440512507627,
+ "eval_recall": 0.5952918413415027,
+ "eval_recall_macro": 0.7702420454972136,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 614.992,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1690
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8905836892414074,
+ "eval_auc": 0.9094545718773954,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6327645051194539,
+ "eval_f1_macro": 0.78423982216183,
+ "eval_loss": 0.2880232632160187,
+ "eval_pr_auc": 0.6147358252333397,
+ "eval_precision": 0.6719826023921711,
+ "eval_precision_macro": 0.7991174470355793,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5978716543050628,
+ "eval_recall_macro": 0.7716224934167917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.665,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1716
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.8908887533048607,
+ "eval_auc": 0.9099424231201196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6360244233378561,
+ "eval_f1_macro": 0.7859248910947654,
+ "eval_loss": 0.28752708435058594,
+ "eval_pr_auc": 0.6159928290925853,
+ "eval_precision": 0.6708407871198568,
+ "eval_precision_macro": 0.7990901618287602,
+ "eval_pred_class_0": 16873,
+ "eval_pred_class_1": 2795,
+ "eval_predicted_binding_ratio": 0.14210900955867398,
+ "eval_recall": 0.6046436633344082,
+ "eval_recall_macro": 0.7745557907424743,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.527,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1742
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.910346516476819,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6362247496180614,
+ "eval_f1_macro": 0.7860747010162366,
+ "eval_loss": 0.2870826721191406,
+ "eval_pr_auc": 0.6168347475575285,
+ "eval_precision": 0.6716845878136201,
+ "eval_precision_macro": 0.7994932004123201,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6043211867139633,
+ "eval_recall_macro": 0.7745152743493158,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.866,
+ "eval_steps_per_second": 3.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1768
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.8910921293471629,
+ "eval_auc": 0.9107640601470772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6352179836512262,
+ "eval_f1_macro": 0.7856044496310159,
+ "eval_loss": 0.28665614128112793,
+ "eval_pr_auc": 0.6181373929491851,
+ "eval_precision": 0.673042223024179,
+ "eval_precision_macro": 0.7999465716529429,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.6014188971299581,
+ "eval_recall_macro": 0.7733659343499732,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.359,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1794
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.9111344401273891,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6354822248681748,
+ "eval_f1_macro": 0.7857149295725039,
+ "eval_loss": 0.28624698519706726,
+ "eval_pr_auc": 0.6190938884927122,
+ "eval_precision": 0.6724262059035278,
+ "eval_precision_macro": 0.7997122148522967,
+ "eval_pred_class_0": 16890,
+ "eval_pred_class_1": 2778,
+ "eval_predicted_binding_ratio": 0.14124466137888958,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7737289273635768,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.866,
+ "eval_steps_per_second": 3.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1820
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9114138601724477,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.63642541624193,
+ "eval_f1_macro": 0.7862246662674524,
+ "eval_loss": 0.2858646512031555,
+ "eval_pr_auc": 0.6197061363545492,
+ "eval_precision": 0.6725314183123878,
+ "eval_precision_macro": 0.7998977650704271,
+ "eval_pred_class_0": 16883,
+ "eval_pred_class_1": 2785,
+ "eval_predicted_binding_ratio": 0.14160056945291843,
+ "eval_recall": 0.6039987100935182,
+ "eval_recall_macro": 0.7744747579561573,
+ "eval_runtime": 0.2386,
+ "eval_samples_per_second": 683.037,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1846
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9118766988928523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6373514431239389,
+ "eval_f1_macro": 0.7867436519572335,
+ "eval_loss": 0.2853938341140747,
+ "eval_pr_auc": 0.6212208808374569,
+ "eval_precision": 0.6730010756543564,
+ "eval_precision_macro": 0.8002424656665053,
+ "eval_pred_class_0": 16879,
+ "eval_pred_class_1": 2789,
+ "eval_predicted_binding_ratio": 0.14180394549522066,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7751197111970474,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.683,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9121406831945651,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637411047102677,
+ "eval_f1_macro": 0.7867021736679862,
+ "eval_loss": 0.2850610911846161,
+ "eval_pr_auc": 0.6219405042066507,
+ "eval_precision": 0.6715458764726884,
+ "eval_precision_macro": 0.79960764506032,
+ "eval_pred_class_0": 16867,
+ "eval_pred_class_1": 2801,
+ "eval_predicted_binding_ratio": 0.14241407362212732,
+ "eval_recall": 0.6065785230570784,
+ "eval_recall_macro": 0.7755232206038093,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 618.95,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1898
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.8912446613788896,
+ "eval_auc": 0.9124869655074592,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637026981164093,
+ "eval_f1_macro": 0.7865337040796394,
+ "eval_loss": 0.284681111574173,
+ "eval_pr_auc": 0.6229948438184316,
+ "eval_precision": 0.6722779369627507,
+ "eval_precision_macro": 0.7998744508231626,
+ "eval_pred_class_0": 16876,
+ "eval_pred_class_1": 2792,
+ "eval_predicted_binding_ratio": 0.14195647752694732,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7750291697592493,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.745,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1924
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9128360118500571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6385786802030456,
+ "eval_f1_macro": 0.7873381643700563,
+ "eval_loss": 0.2842992842197418,
+ "eval_pr_auc": 0.6238239183047751,
+ "eval_precision": 0.6717693129227483,
+ "eval_precision_macro": 0.7998801484834395,
+ "eval_pred_class_0": 16859,
+ "eval_pred_class_1": 2809,
+ "eval_predicted_binding_ratio": 0.14282082570673174,
+ "eval_recall": 0.6085133827797484,
+ "eval_recall_macro": 0.7764302895066123,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.282,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1950
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9131794425407568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.638700947225981,
+ "eval_f1_macro": 0.7873973860112672,
+ "eval_loss": 0.2839708924293518,
+ "eval_pr_auc": 0.6248797725776689,
+ "eval_precision": 0.671647100675916,
+ "eval_precision_macro": 0.7998444318708524,
+ "eval_pred_class_0": 16857,
+ "eval_pred_class_1": 2811,
+ "eval_predicted_binding_ratio": 0.14292251372788287,
+ "eval_recall": 0.6088358594001935,
+ "eval_recall_macro": 0.7765613473375688,
+ "eval_runtime": 0.2644,
+ "eval_samples_per_second": 616.603,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1976
+ },
+ {
+ "epoch": 76.92307692307692,
+ "grad_norm": 18483.060546875,
+ "learning_rate": 9.912189372587507e-07,
+ "loss": 0.2796,
+ "step": 2000
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.891651413463494,
+ "eval_auc": 0.9134005357195656,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389971201084195,
+ "eval_f1_macro": 0.7876288504858192,
+ "eval_loss": 0.28368592262268066,
+ "eval_pr_auc": 0.6256253637409228,
+ "eval_precision": 0.6730906495360457,
+ "eval_precision_macro": 0.8005261145225586,
+ "eval_pred_class_0": 16866,
+ "eval_pred_class_1": 2802,
+ "eval_predicted_binding_ratio": 0.14246491763270286,
+ "eval_recall": 0.6081909061593035,
+ "eval_recall_macro": 0.7764501340719858,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.408,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2002
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8918547895057962,
+ "eval_auc": 0.9135784263355038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6399187404773997,
+ "eval_f1_macro": 0.788145675542478,
+ "eval_loss": 0.283357173204422,
+ "eval_pr_auc": 0.6259773419133142,
+ "eval_precision": 0.6735566642908054,
+ "eval_precision_macro": 0.8008691873227245,
+ "eval_pred_class_0": 16862,
+ "eval_pred_class_1": 2806,
+ "eval_predicted_binding_ratio": 0.14266829367500508,
+ "eval_recall": 0.6094808126410836,
+ "eval_recall_macro": 0.7770950873128759,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.804,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2028
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139080660751812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6396335256192739,
+ "eval_f1_macro": 0.788060288914535,
+ "eval_loss": 0.28298139572143555,
+ "eval_pr_auc": 0.6270064177031266,
+ "eval_precision": 0.6749015395631937,
+ "eval_precision_macro": 0.8014211401519672,
+ "eval_pred_class_0": 16875,
+ "eval_pred_class_1": 2793,
+ "eval_predicted_binding_ratio": 0.14200732153752288,
+ "eval_recall": 0.6078684295388584,
+ "eval_recall_macro": 0.7765303395958915,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.262,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2054
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8923123856009763,
+ "eval_auc": 0.9140997087121456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6410169491525424,
+ "eval_f1_macro": 0.7888360257187523,
+ "eval_loss": 0.28268861770629883,
+ "eval_pr_auc": 0.6271691682976167,
+ "eval_precision": 0.6755984280100036,
+ "eval_precision_macro": 0.8019346102940528,
+ "eval_pred_class_0": 16869,
+ "eval_pred_class_1": 2799,
+ "eval_predicted_binding_ratio": 0.1423123856009762,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7774977694572265,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.576,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2080
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9143275951752264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6415989159891599,
+ "eval_f1_macro": 0.7891531311221224,
+ "eval_loss": 0.28239867091178894,
+ "eval_pr_auc": 0.6278526459152028,
+ "eval_precision": 0.6757046022119158,
+ "eval_precision_macro": 0.8020681327098713,
+ "eval_pred_class_0": 16865,
+ "eval_pred_class_1": 2803,
+ "eval_predicted_binding_ratio": 0.14251576164327842,
+ "eval_recall": 0.6107707191228636,
+ "eval_recall_macro": 0.7779513039086281,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.069,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.8925666056538539,
+ "eval_auc": 0.9145931950717662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641317263622475,
+ "eval_f1_macro": 0.7890694555517069,
+ "eval_loss": 0.2821619510650635,
+ "eval_pr_auc": 0.6287354538303637,
+ "eval_precision": 0.6770609318996416,
+ "eval_precision_macro": 0.8026257379014738,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6091583360206385,
+ "eval_recall_macro": 0.7773865561916435,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.835,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2132
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9148203126674294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6442453946256549,
+ "eval_f1_macro": 0.7906286370477088,
+ "eval_loss": 0.2817782461643219,
+ "eval_pr_auc": 0.6293872239214393,
+ "eval_precision": 0.6768465909090909,
+ "eval_precision_macro": 0.8029675631972466,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6146404385682038,
+ "eval_recall_macro": 0.7798559831520322,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.502,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2158
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8929225137278829,
+ "eval_auc": 0.9150136584917113,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6435341909275558,
+ "eval_f1_macro": 0.7902665569930348,
+ "eval_loss": 0.2815438210964203,
+ "eval_pr_auc": 0.6300492382313454,
+ "eval_precision": 0.677235482721767,
+ "eval_precision_macro": 0.8030326633702543,
+ "eval_pred_class_0": 16861,
+ "eval_pred_class_1": 2807,
+ "eval_predicted_binding_ratio": 0.14271913768558064,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7791705135179836,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.685,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2184
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8928716697173072,
+ "eval_auc": 0.9151760160393141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6437869822485207,
+ "eval_f1_macro": 0.7903713942390684,
+ "eval_loss": 0.28126296401023865,
+ "eval_pr_auc": 0.6304146488380505,
+ "eval_precision": 0.6766169154228856,
+ "eval_precision_macro": 0.8027975997548746,
+ "eval_pred_class_0": 16854,
+ "eval_pred_class_1": 2814,
+ "eval_predicted_binding_ratio": 0.14307504575960953,
+ "eval_recall": 0.6139954853273137,
+ "eval_recall_macro": 0.7795335065315872,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.417,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2210
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9154795142867925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.643861134631668,
+ "eval_f1_macro": 0.7904777241463208,
+ "eval_loss": 0.2809857428073883,
+ "eval_pr_auc": 0.6313964494387146,
+ "eval_precision": 0.677960057061341,
+ "eval_precision_macro": 0.803401280902587,
+ "eval_pred_class_0": 16864,
+ "eval_pred_class_1": 2804,
+ "eval_predicted_binding_ratio": 0.14256660565385398,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7792610549557817,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.65,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2236
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9156233995513745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6458999831621485,
+ "eval_f1_macro": 0.7914651276711422,
+ "eval_loss": 0.28072693943977356,
+ "eval_pr_auc": 0.631527672626228,
+ "eval_precision": 0.6758280479210712,
+ "eval_precision_macro": 0.8027684505796682,
+ "eval_pred_class_0": 16830,
+ "eval_pred_class_1": 2838,
+ "eval_predicted_binding_ratio": 0.14429530201342283,
+ "eval_recall": 0.618510158013544,
+ "eval_recall_macro": 0.7814890380820421,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.026,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2262
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9158623713307676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6449103821440649,
+ "eval_f1_macro": 0.7910387587819241,
+ "eval_loss": 0.28049618005752563,
+ "eval_pr_auc": 0.6324229662687507,
+ "eval_precision": 0.6779239246356203,
+ "eval_precision_macro": 0.8035422055690709,
+ "eval_pred_class_0": 16855,
+ "eval_pred_class_1": 2813,
+ "eval_predicted_binding_ratio": 0.14302420174903396,
+ "eval_recall": 0.6149629151886489,
+ "eval_recall_macro": 0.7801379433793187,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.715,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2288
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9160425393514616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6452919338508268,
+ "eval_f1_macro": 0.7911883195144587,
+ "eval_loss": 0.28025364875793457,
+ "eval_pr_auc": 0.6329798450144843,
+ "eval_precision": 0.6768141592920354,
+ "eval_precision_macro": 0.8031105172758937,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7807328715755691,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.097,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2314
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8936343298759406,
+ "eval_auc": 0.9161711835226769,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463826910074375,
+ "eval_f1_macro": 0.7918927219250234,
+ "eval_loss": 0.2800801396369934,
+ "eval_pr_auc": 0.6332605675535015,
+ "eval_precision": 0.6792184724689165,
+ "eval_precision_macro": 0.8043336176502299,
+ "eval_pred_class_0": 16853,
+ "eval_pred_class_1": 2815,
+ "eval_predicted_binding_ratio": 0.14312588977018506,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7810346763682292,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.634,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8934309538336384,
+ "eval_auc": 0.9163414730569294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463044211947351,
+ "eval_f1_macro": 0.7917843566614202,
+ "eval_loss": 0.2798333764076233,
+ "eval_pr_auc": 0.633657166273441,
+ "eval_precision": 0.6778761061946903,
+ "eval_precision_macro": 0.8037305484960271,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.617542728152209,
+ "eval_recall_macro": 0.7813071279440347,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.372,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2366
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8937868619076673,
+ "eval_auc": 0.9165959292421633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6469494676356261,
+ "eval_f1_macro": 0.7922200583338069,
+ "eval_loss": 0.27958908677101135,
+ "eval_pr_auc": 0.6345494466448222,
+ "eval_precision": 0.6796875,
+ "eval_precision_macro": 0.8046253782933777,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.617220251531764,
+ "eval_recall_macro": 0.7813873334679403,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.29,
+ "eval_steps_per_second": 3.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2392
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8938885499288184,
+ "eval_auc": 0.9168139566838005,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6482386650935446,
+ "eval_f1_macro": 0.7928796235385993,
+ "eval_loss": 0.27935075759887695,
+ "eval_pr_auc": 0.6350396647674293,
+ "eval_precision": 0.6790254237288136,
+ "eval_precision_macro": 0.8045281549625298,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.7826272149050808,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.307,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2418
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8941936139922717,
+ "eval_auc": 0.916949978089225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6492499578628013,
+ "eval_f1_macro": 0.7934750822155368,
+ "eval_loss": 0.27906060218811035,
+ "eval_pr_auc": 0.6354743744677446,
+ "eval_precision": 0.6800847457627118,
+ "eval_precision_macro": 0.8051469107763429,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7832014712735463,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.231,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2444
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.894498678055725,
+ "eval_auc": 0.9171692902207247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6496707749451291,
+ "eval_f1_macro": 0.7937845988573549,
+ "eval_loss": 0.2788851261138916,
+ "eval_pr_auc": 0.6362118552671664,
+ "eval_precision": 0.6817859673990078,
+ "eval_precision_macro": 0.8059588747122072,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6204450177362141,
+ "eval_recall_macro": 0.7831204384872295,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.948,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2470
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9172832383185145,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.64977192093259,
+ "eval_f1_macro": 0.7938688135051675,
+ "eval_loss": 0.27873092889785767,
+ "eval_pr_auc": 0.6365172336600542,
+ "eval_precision": 0.6823988644428672,
+ "eval_precision_macro": 0.8062439426071903,
+ "eval_pred_class_0": 16850,
+ "eval_pred_class_1": 2818,
+ "eval_predicted_binding_ratio": 0.14327842180191175,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.783049741614805,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.058,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2496
+ },
+ {
+ "epoch": 96.15384615384616,
+ "grad_norm": 12855.328125,
+ "learning_rate": 9.74310718484651e-07,
+ "loss": 0.268,
+ "step": 2500
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8948037421191783,
+ "eval_auc": 0.9174495472606937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506837751139625,
+ "eval_f1_macro": 0.7943808843546348,
+ "eval_loss": 0.27859047055244446,
+ "eval_pr_auc": 0.636938752781715,
+ "eval_precision": 0.6828490432317506,
+ "eval_precision_macro": 0.8065794545376371,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783694694855695,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.744,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2522
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8951596501932072,
+ "eval_auc": 0.9175816853990344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6521592442645074,
+ "eval_f1_macro": 0.7952187504847441,
+ "eval_loss": 0.2782803475856781,
+ "eval_pr_auc": 0.6372336473067074,
+ "eval_precision": 0.6837637071100107,
+ "eval_precision_macro": 0.8072045778587877,
+ "eval_pred_class_0": 16841,
+ "eval_pred_class_1": 2827,
+ "eval_predicted_binding_ratio": 0.14373601789709173,
+ "eval_recall": 0.6233473073202193,
+ "eval_recall_macro": 0.784692305196296,
+ "eval_runtime": 0.2192,
+ "eval_samples_per_second": 743.723,
+ "eval_steps_per_second": 4.563,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2548
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917689375499995,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6515867656988521,
+ "eval_f1_macro": 0.7949062764205981,
+ "eval_loss": 0.2782030701637268,
+ "eval_pr_auc": 0.6376582660543189,
+ "eval_precision": 0.683669854764435,
+ "eval_precision_macro": 0.8070768389286704,
+ "eval_pred_class_0": 16845,
+ "eval_pred_class_1": 2823,
+ "eval_predicted_binding_ratio": 0.1435326418547895,
+ "eval_recall": 0.6223798774588842,
+ "eval_recall_macro": 0.7842387707448946,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.364,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917862049496492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513513513513514,
+ "eval_f1_macro": 0.7947922665303561,
+ "eval_loss": 0.2779688835144043,
+ "eval_pr_auc": 0.6381115995039711,
+ "eval_precision": 0.6839304717985101,
+ "eval_precision_macro": 0.8071560484103832,
+ "eval_pred_class_0": 16849,
+ "eval_pred_class_1": 2819,
+ "eval_predicted_binding_ratio": 0.14332926581248728,
+ "eval_recall": 0.6217349242179941,
+ "eval_recall_macro": 0.7839766550829814,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.395,
+ "eval_steps_per_second": 4.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2600
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8951088061826317,
+ "eval_auc": 0.9180189763096767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513435862768294,
+ "eval_f1_macro": 0.7948061179237165,
+ "eval_loss": 0.27778077125549316,
+ "eval_pr_auc": 0.6385730633658938,
+ "eval_precision": 0.6843039772727273,
+ "eval_precision_macro": 0.8073193278245905,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783875777731291,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.591,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2626
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.895413870246085,
+ "eval_auc": 0.9182058500221522,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6518869521069555,
+ "eval_f1_macro": 0.7951749356520059,
+ "eval_loss": 0.2776651084423065,
+ "eval_pr_auc": 0.6390385071928153,
+ "eval_precision": 0.6858974358974359,
+ "eval_precision_macro": 0.8081029290993704,
+ "eval_pred_class_0": 16860,
+ "eval_pred_class_1": 2808,
+ "eval_predicted_binding_ratio": 0.1427699816961562,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7839258027759306,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.358,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2652
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8952104942037828,
+ "eval_auc": 0.9183596527031714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6527379949452401,
+ "eval_f1_macro": 0.7955166277830898,
+ "eval_loss": 0.2773846685886383,
+ "eval_pr_auc": 0.6394513455183966,
+ "eval_precision": 0.6834862385321101,
+ "eval_precision_macro": 0.8071702310636076,
+ "eval_pred_class_0": 16834,
+ "eval_pred_class_1": 2834,
+ "eval_predicted_binding_ratio": 0.1440919259711206,
+ "eval_recall": 0.6246372138019993,
+ "eval_recall_macro": 0.785246716999388,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.926,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2678
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9185293680199856,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.2772791385650635,
+ "eval_pr_auc": 0.6398373166129732,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.649,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2704
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9186085224340037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6536519690339953,
+ "eval_f1_macro": 0.7960120658489734,
+ "eval_loss": 0.27707138657569885,
+ "eval_pr_auc": 0.6400560131071933,
+ "eval_precision": 0.6835621260119676,
+ "eval_precision_macro": 0.8073423632971826,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6262495969042244,
+ "eval_recall_macro": 0.7859925475919686,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.533,
+ "eval_steps_per_second": 3.985,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2730
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9187703544266627,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.27683117985725403,
+ "eval_pr_auc": 0.6407556071793965,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.3724,
+ "eval_samples_per_second": 437.667,
+ "eval_steps_per_second": 2.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2756
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8955664022778117,
+ "eval_auc": 0.918983612943811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6543251430494783,
+ "eval_f1_macro": 0.7964085438550979,
+ "eval_loss": 0.2766495645046234,
+ "eval_pr_auc": 0.6413517959683596,
+ "eval_precision": 0.6842661034846885,
+ "eval_precision_macro": 0.8077537803332993,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6268945501451145,
+ "eval_recall_macro": 0.7863753851709456,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.95,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2782
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8957189343095383,
+ "eval_auc": 0.9190842761805244,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655583543240974,
+ "eval_f1_macro": 0.7970707027489733,
+ "eval_loss": 0.27643415331840515,
+ "eval_pr_auc": 0.6415079341486267,
+ "eval_precision": 0.6839523475823406,
+ "eval_precision_macro": 0.8078082185158045,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6294743631086747,
+ "eval_recall_macro": 0.7875143892563956,
+ "eval_runtime": 0.2441,
+ "eval_samples_per_second": 667.885,
+ "eval_steps_per_second": 4.097,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9192001707781057,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6554565327055658,
+ "eval_f1_macro": 0.7970445082288499,
+ "eval_loss": 0.2763550579547882,
+ "eval_pr_auc": 0.6419306315808602,
+ "eval_precision": 0.6848208011243851,
+ "eval_precision_macro": 0.8081695255176081,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6285069332473395,
+ "eval_recall_macro": 0.7871815767220581,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.285,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2834
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9193565525713485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655919395465995,
+ "eval_f1_macro": 0.7972685860227431,
+ "eval_loss": 0.27626872062683105,
+ "eval_pr_auc": 0.6423732230660918,
+ "eval_precision": 0.684302733006307,
+ "eval_precision_macro": 0.8080131483516131,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.7877058080458841,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.337,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2860
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8959223103518406,
+ "eval_auc": 0.9193956188221624,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.656485987581809,
+ "eval_f1_macro": 0.7975781647169913,
+ "eval_loss": 0.2761881351470947,
+ "eval_pr_auc": 0.6423085789727141,
+ "eval_precision": 0.6843946815955213,
+ "eval_precision_macro": 0.8081402319339891,
+ "eval_pred_class_0": 16810,
+ "eval_pred_class_1": 2858,
+ "eval_predicted_binding_ratio": 0.1453121822249339,
+ "eval_recall": 0.6307642695904547,
+ "eval_recall_macro": 0.7881593424972857,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.648,
+ "eval_steps_per_second": 3.82,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2886
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9196281671522437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6564757265244414,
+ "eval_f1_macro": 0.7976085010119736,
+ "eval_loss": 0.2759994864463806,
+ "eval_pr_auc": 0.6431408853405497,
+ "eval_precision": 0.685133239831697,
+ "eval_precision_macro": 0.808462195558094,
+ "eval_pred_class_0": 16816,
+ "eval_pred_class_1": 2852,
+ "eval_predicted_binding_ratio": 0.14500711816148057,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.7879575877939047,
+ "eval_runtime": 0.2423,
+ "eval_samples_per_second": 672.612,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2912
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9197686265771928,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6561291407432319,
+ "eval_f1_macro": 0.797440712214738,
+ "eval_loss": 0.27580633759498596,
+ "eval_pr_auc": 0.6436439478836922,
+ "eval_precision": 0.685523541813071,
+ "eval_precision_macro": 0.8085803418255701,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6291518864882296,
+ "eval_recall_macro": 0.7875644143010352,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2938
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9199272079152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6573731944910984,
+ "eval_f1_macro": 0.7981312081136818,
+ "eval_loss": 0.27558717131614685,
+ "eval_pr_auc": 0.6441453864761489,
+ "eval_precision": 0.6859446196985629,
+ "eval_precision_macro": 0.8089550633431857,
+ "eval_pred_class_0": 16815,
+ "eval_pred_class_1": 2853,
+ "eval_predicted_binding_ratio": 0.14505796217205613,
+ "eval_recall": 0.6310867462108997,
+ "eval_recall_macro": 0.7885016636831041,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.272,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2964
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9200543727465736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6571428571428571,
+ "eval_f1_macro": 0.7980197003020941,
+ "eval_loss": 0.2754935324192047,
+ "eval_pr_auc": 0.6445859889828064,
+ "eval_precision": 0.6862056862056862,
+ "eval_precision_macro": 0.8090342302245508,
+ "eval_pred_class_0": 16819,
+ "eval_pred_class_1": 2849,
+ "eval_predicted_binding_ratio": 0.1448545861297539,
+ "eval_recall": 0.6304417929700097,
+ "eval_recall_macro": 0.7882395480211912,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.903,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2990
+ },
+ {
+ "epoch": 115.38461538461539,
+ "grad_norm": 13551.1435546875,
+ "learning_rate": 9.488660254357756e-07,
+ "loss": 0.2594,
+ "step": 3000
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.9201159794649721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.2753925323486328,
+ "eval_pr_auc": 0.6447630589609926,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.638,
+ "eval_steps_per_second": 3.912,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3016
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.920259670079575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.27515658736228943,
+ "eval_pr_auc": 0.6451647424161069,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.985,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.920327525062304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6583975863224941,
+ "eval_f1_macro": 0.7986623832121911,
+ "eval_loss": 0.2750197649002075,
+ "eval_pr_auc": 0.6454382852682906,
+ "eval_precision": 0.6855148342059337,
+ "eval_precision_macro": 0.8089241730394068,
+ "eval_pred_class_0": 16803,
+ "eval_pred_class_1": 2865,
+ "eval_predicted_binding_ratio": 0.14566809029896277,
+ "eval_recall": 0.6333440825540149,
+ "eval_recall_macro": 0.7894794294583318,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.495,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3068
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.9205032745284716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585120643431636,
+ "eval_f1_macro": 0.7987177919414212,
+ "eval_loss": 0.27491119503974915,
+ "eval_pr_auc": 0.6460050422984182,
+ "eval_precision": 0.6853854202999651,
+ "eval_precision_macro": 0.8088851986923312,
+ "eval_pred_class_0": 16801,
+ "eval_pred_class_1": 2867,
+ "eval_predicted_binding_ratio": 0.1457697783201139,
+ "eval_recall": 0.6336665591744598,
+ "eval_recall_macro": 0.7896104872892882,
+ "eval_runtime": 0.2295,
+ "eval_samples_per_second": 710.293,
+ "eval_steps_per_second": 4.358,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3094
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9205394599595942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6587487453997992,
+ "eval_f1_macro": 0.7987969999557303,
+ "eval_loss": 0.2747833728790283,
+ "eval_pr_auc": 0.6459576422229408,
+ "eval_precision": 0.6843934654153633,
+ "eval_precision_macro": 0.8084881983738124,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7900743576545822,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.079,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3120
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9206969317927203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6591785414920369,
+ "eval_f1_macro": 0.7991286912009045,
+ "eval_loss": 0.2746541202068329,
+ "eval_pr_auc": 0.6466379382676535,
+ "eval_precision": 0.6864525139664804,
+ "eval_precision_macro": 0.8094545359644352,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6339890357949048,
+ "eval_recall_macro": 0.7898924475165747,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.354,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3146
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9208301697034432,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592927769398358,
+ "eval_f1_macro": 0.7991839832435101,
+ "eval_loss": 0.2745382785797119,
+ "eval_pr_auc": 0.6470920919458031,
+ "eval_precision": 0.6863224005582693,
+ "eval_precision_macro": 0.809415217658018,
+ "eval_pred_class_0": 16802,
+ "eval_pred_class_1": 2866,
+ "eval_predicted_binding_ratio": 0.14571893430953833,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7900235053475313,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.372,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3172
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.8967358145210494,
+ "eval_auc": 0.9209352222971863,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597420003350645,
+ "eval_f1_macro": 0.7994367387715422,
+ "eval_loss": 0.2744734585285187,
+ "eval_pr_auc": 0.6474021950727136,
+ "eval_precision": 0.6865411436541143,
+ "eval_precision_macro": 0.809580095636581,
+ "eval_pred_class_0": 16800,
+ "eval_pred_class_1": 2868,
+ "eval_predicted_binding_ratio": 0.14582062233068943,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7903459819679763,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.926,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3198
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9210344159265571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6599664991624791,
+ "eval_f1_macro": 0.7995630613656908,
+ "eval_loss": 0.2742863893508911,
+ "eval_pr_auc": 0.6479185906925482,
+ "eval_precision": 0.6866504008365284,
+ "eval_precision_macro": 0.8096624823993346,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.6352789422766849,
+ "eval_recall_macro": 0.7905072202781989,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.863,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3224
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9211382130279347,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6603015075376885,
+ "eval_f1_macro": 0.7997605361820792,
+ "eval_loss": 0.27425193786621094,
+ "eval_pr_auc": 0.6481518613470144,
+ "eval_precision": 0.6869989543394911,
+ "eval_precision_macro": 0.8098665228272252,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7906986390676873,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.835,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9212811737051158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6598456893659845,
+ "eval_f1_macro": 0.7995399118610351,
+ "eval_loss": 0.2741680145263672,
+ "eval_pr_auc": 0.6487724280436702,
+ "eval_precision": 0.6875218455085634,
+ "eval_precision_macro": 0.8100249793973471,
+ "eval_pred_class_0": 16807,
+ "eval_pred_class_1": 2861,
+ "eval_predicted_binding_ratio": 0.14546471425666058,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7901744077438613,
+ "eval_runtime": 0.2775,
+ "eval_samples_per_second": 587.468,
+ "eval_steps_per_second": 3.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.8966849705104739,
+ "eval_auc": 0.9212962201485035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608811748998665,
+ "eval_f1_macro": 0.7999703379297798,
+ "eval_loss": 0.2739817500114441,
+ "eval_pr_auc": 0.648761125236648,
+ "eval_precision": 0.6848841231407817,
+ "eval_precision_macro": 0.809033228048307,
+ "eval_pred_class_0": 16777,
+ "eval_pred_class_1": 2891,
+ "eval_predicted_binding_ratio": 0.1469900345739272,
+ "eval_recall": 0.6385037084811351,
+ "eval_recall_macro": 0.7917574376292318,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.003,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3302
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.8971425666056538,
+ "eval_auc": 0.9213870048987755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608549874266555,
+ "eval_f1_macro": 0.8001167448595325,
+ "eval_loss": 0.2739529609680176,
+ "eval_pr_auc": 0.6489216955933252,
+ "eval_precision": 0.6881983240223464,
+ "eval_precision_macro": 0.8104762150937725,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7908495414640173,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3328
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.8970408785845028,
+ "eval_auc": 0.9215601460552225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6604058359885964,
+ "eval_f1_macro": 0.7998640212813866,
+ "eval_loss": 0.27383002638816833,
+ "eval_pr_auc": 0.6495277773578755,
+ "eval_precision": 0.6879804332634522,
+ "eval_precision_macro": 0.8103117684584547,
+ "eval_pred_class_0": 16806,
+ "eval_pred_class_1": 2862,
+ "eval_predicted_binding_ratio": 0.1455155582672361,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7905270648435724,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.602,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3354
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.8969900345739272,
+ "eval_auc": 0.9216559138449605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6610906657745065,
+ "eval_f1_macro": 0.8001778048579948,
+ "eval_loss": 0.27363157272338867,
+ "eval_pr_auc": 0.6497825902519792,
+ "eval_precision": 0.6868265554396942,
+ "eval_precision_macro": 0.8099131883862756,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.6372138019993551,
+ "eval_recall_macro": 0.7914142891810019,
+ "eval_runtime": 0.2147,
+ "eval_samples_per_second": 759.086,
+ "eval_steps_per_second": 4.657,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3380
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9217394576160085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6619906479625919,
+ "eval_f1_macro": 0.800648676506185,
+ "eval_loss": 0.27347350120544434,
+ "eval_pr_auc": 0.6501280761818352,
+ "eval_precision": 0.686525805334257,
+ "eval_precision_macro": 0.8099216238398834,
+ "eval_pred_class_0": 16781,
+ "eval_pred_class_1": 2887,
+ "eval_predicted_binding_ratio": 0.14678665853162498,
+ "eval_recall": 0.6391486617220251,
+ "eval_recall_macro": 0.7922609971252728,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.642,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3406
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9218276924515536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6609715242881072,
+ "eval_f1_macro": 0.8001554858148563,
+ "eval_loss": 0.2734222412109375,
+ "eval_pr_auc": 0.6505561229387223,
+ "eval_precision": 0.6876960613454165,
+ "eval_precision_macro": 0.8102746036830064,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7910814766466644,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.527,
+ "eval_steps_per_second": 3.893,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3432
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.9219583415175538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6620805369127517,
+ "eval_f1_macro": 0.8008688878235859,
+ "eval_loss": 0.273334801197052,
+ "eval_pr_auc": 0.6510448354697362,
+ "eval_precision": 0.6901014340678558,
+ "eval_precision_macro": 0.8114972635268781,
+ "eval_pred_class_0": 16809,
+ "eval_pred_class_1": 2859,
+ "eval_predicted_binding_ratio": 0.14536302623550945,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7913832814393245,
+ "eval_runtime": 0.2542,
+ "eval_samples_per_second": 641.276,
+ "eval_steps_per_second": 3.934,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3458
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.92201595791138,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6632107023411371,
+ "eval_f1_macro": 0.8014158800109571,
+ "eval_loss": 0.2731546461582184,
+ "eval_pr_auc": 0.6511084632800272,
+ "eval_precision": 0.6887808266759291,
+ "eval_precision_macro": 0.8110948031169865,
+ "eval_pred_class_0": 16789,
+ "eval_pred_class_1": 2879,
+ "eval_predicted_binding_ratio": 0.14637990644702054,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7926938597488895,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 638.927,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3484
+ },
+ {
+ "epoch": 134.6153846153846,
+ "grad_norm": 16295.5498046875,
+ "learning_rate": 9.153428025759045e-07,
+ "loss": 0.2515,
+ "step": 3500
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.8977526947325605,
+ "eval_auc": 0.9221653809678686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.663543583737661,
+ "eval_f1_macro": 0.8016300010477627,
+ "eval_loss": 0.2730526030063629,
+ "eval_pr_auc": 0.6517474669432921,
+ "eval_precision": 0.6894993045897079,
+ "eval_precision_macro": 0.8114599905511665,
+ "eval_pred_class_0": 16792,
+ "eval_pred_class_1": 2876,
+ "eval_predicted_binding_ratio": 0.14622737441529388,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7927844011866875,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.318,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.8976510067114094,
+ "eval_auc": 0.9222144814251072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6634342083263668,
+ "eval_f1_macro": 0.8015417181640828,
+ "eval_loss": 0.27299538254737854,
+ "eval_pr_auc": 0.6516584418617962,
+ "eval_precision": 0.6888888888888889,
+ "eval_precision_macro": 0.8111766341037249,
+ "eval_pred_class_0": 16788,
+ "eval_pred_class_1": 2880,
+ "eval_predicted_binding_ratio": 0.1464307504575961,
+ "eval_recall": 0.6397936149629152,
+ "eval_recall_macro": 0.792855098059112,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.888,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3536
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.8978543827537117,
+ "eval_auc": 0.922406970789481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6637656903765691,
+ "eval_f1_macro": 0.8017728364954996,
+ "eval_loss": 0.27294018864631653,
+ "eval_pr_auc": 0.6525345183514315,
+ "eval_precision": 0.6899791231732777,
+ "eval_precision_macro": 0.8117038643138033,
+ "eval_pred_class_0": 16794,
+ "eval_pred_class_1": 2874,
+ "eval_predicted_binding_ratio": 0.14612568639414278,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7928447621452195,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.531,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3562
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.8979560707748627,
+ "eval_auc": 0.9224206449505158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6643251379829402,
+ "eval_f1_macro": 0.8020789283763069,
+ "eval_loss": 0.272890567779541,
+ "eval_pr_auc": 0.652455646685698,
+ "eval_precision": 0.6900625434329395,
+ "eval_precision_macro": 0.8118269834496443,
+ "eval_pred_class_0": 16790,
+ "eval_pred_class_1": 2878,
+ "eval_predicted_binding_ratio": 0.14632906243644497,
+ "eval_recall": 0.6404385682038052,
+ "eval_recall_macro": 0.793298296596621,
+ "eval_runtime": 0.2465,
+ "eval_samples_per_second": 661.298,
+ "eval_steps_per_second": 4.057,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3588
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9225580970332872,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6647727272727273,
+ "eval_f1_macro": 0.802313204605421,
+ "eval_loss": 0.2726689577102661,
+ "eval_pr_auc": 0.6528952820360587,
+ "eval_precision": 0.6899063475546305,
+ "eval_precision_macro": 0.8118283599554506,
+ "eval_pred_class_0": 16785,
+ "eval_pred_class_1": 2883,
+ "eval_predicted_binding_ratio": 0.14658328248932276,
+ "eval_recall": 0.6414059980651403,
+ "eval_recall_macro": 0.7937216505687565,
+ "eval_runtime": 0.1981,
+ "eval_samples_per_second": 822.798,
+ "eval_steps_per_second": 5.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3614
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9226611836622408,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6642078792958928,
+ "eval_f1_macro": 0.8020928521767887,
+ "eval_loss": 0.2726409435272217,
+ "eval_pr_auc": 0.6533512747607447,
+ "eval_precision": 0.6916899441340782,
+ "eval_precision_macro": 0.8125195733524473,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7927637293589026,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.759,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3640
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9227806014244446,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.666110183639399,
+ "eval_f1_macro": 0.8030664874893451,
+ "eval_loss": 0.2725253105163574,
+ "eval_pr_auc": 0.6539162157851042,
+ "eval_precision": 0.6905503634475597,
+ "eval_precision_macro": 0.8123173177271173,
+ "eval_pred_class_0": 16779,
+ "eval_pred_class_1": 2889,
+ "eval_predicted_binding_ratio": 0.14688834655277608,
+ "eval_recall": 0.6433408577878104,
+ "eval_recall_macro": 0.7946890804300916,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.673,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3666
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9228251470721713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6665556850341269,
+ "eval_f1_macro": 0.8032289361592369,
+ "eval_loss": 0.27245020866394043,
+ "eval_pr_auc": 0.6537875546315605,
+ "eval_precision": 0.6889194769442533,
+ "eval_precision_macro": 0.8116772542816959,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6455981941309256,
+ "eval_recall_macro": 0.795515943808989,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.157,
+ "eval_steps_per_second": 3.903,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3692
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9229446329618678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6663324979114453,
+ "eval_f1_macro": 0.8032271166958205,
+ "eval_loss": 0.2723686695098877,
+ "eval_pr_auc": 0.6543603067705216,
+ "eval_precision": 0.6914008321775312,
+ "eval_precision_macro": 0.8127225800544472,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6430183811673653,
+ "eval_recall_macro": 0.7946485640369331,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.012,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3718
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9229539761608667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6658882402001668,
+ "eval_f1_macro": 0.8029060288610683,
+ "eval_loss": 0.2724270820617676,
+ "eval_pr_auc": 0.6540924505284794,
+ "eval_precision": 0.6897028334485141,
+ "eval_precision_macro": 0.8119135366717949,
+ "eval_pred_class_0": 16774,
+ "eval_pred_class_1": 2894,
+ "eval_predicted_binding_ratio": 0.14714256660565386,
+ "eval_recall": 0.6436633344082554,
+ "eval_recall_macro": 0.7947295968232501,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.29,
+ "eval_steps_per_second": 3.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.923075982767793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671105193075899,
+ "eval_f1_macro": 0.8035504588856721,
+ "eval_loss": 0.27218085527420044,
+ "eval_pr_auc": 0.6548951387334131,
+ "eval_precision": 0.6893704850361198,
+ "eval_precision_macro": 0.8119604647601695,
+ "eval_pred_class_0": 16761,
+ "eval_pred_class_1": 2907,
+ "eval_predicted_binding_ratio": 0.14780353874313606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7958686009087,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.353,
+ "eval_steps_per_second": 4.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3770
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9231917800403848,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671111851975329,
+ "eval_f1_macro": 0.8036038872863508,
+ "eval_loss": 0.27211907505989075,
+ "eval_pr_auc": 0.6553363499797747,
+ "eval_precision": 0.6904761904761905,
+ "eval_precision_macro": 0.8124414345344577,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7955659688536286,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 629.971,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3796
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9232496689441817,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676661669165417,
+ "eval_f1_macro": 0.8039077842052783,
+ "eval_loss": 0.2721000015735626,
+ "eval_pr_auc": 0.655340056976625,
+ "eval_precision": 0.6905582356995176,
+ "eval_precision_macro": 0.812564099359958,
+ "eval_pred_class_0": 16766,
+ "eval_pred_class_1": 2902,
+ "eval_predicted_binding_ratio": 0.14754931869025828,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7960195033050301,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.767,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3822
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9233701767462686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6673336668334167,
+ "eval_f1_macro": 0.8037469198020228,
+ "eval_loss": 0.2719952464103699,
+ "eval_pr_auc": 0.6558934902075464,
+ "eval_precision": 0.6909530386740331,
+ "eval_precision_macro": 0.8126837695158862,
+ "eval_pred_class_0": 16772,
+ "eval_pred_class_1": 2896,
+ "eval_predicted_binding_ratio": 0.14724425462680496,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956263298121606,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.275,
+ "eval_steps_per_second": 3.904,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3848
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.8983628228594671,
+ "eval_auc": 0.9234171749837325,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676641729010806,
+ "eval_f1_macro": 0.8038359878940744,
+ "eval_loss": 0.27190613746643066,
+ "eval_pr_auc": 0.6558672612955618,
+ "eval_precision": 0.6890871654083733,
+ "eval_precision_macro": 0.8119245066626444,
+ "eval_pred_class_0": 16754,
+ "eval_pred_class_1": 2914,
+ "eval_predicted_binding_ratio": 0.14815944681716495,
+ "eval_recall": 0.6475330538535956,
+ "eval_recall_macro": 0.796423012711792,
+ "eval_runtime": 0.1964,
+ "eval_samples_per_second": 830.09,
+ "eval_steps_per_second": 5.093,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3874
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9235316972989609,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6682204095222241,
+ "eval_f1_macro": 0.8042113179058208,
+ "eval_loss": 0.2718164622783661,
+ "eval_pr_auc": 0.6562427373314145,
+ "eval_precision": 0.6906400550584997,
+ "eval_precision_macro": 0.8126866902186664,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7964730377564316,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.207,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3900
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.923607970893288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.66744379683597,
+ "eval_f1_macro": 0.8037648014211952,
+ "eval_loss": 0.27192452549934387,
+ "eval_pr_auc": 0.6563118529429329,
+ "eval_precision": 0.6900826446280992,
+ "eval_precision_macro": 0.8123224008156005,
+ "eval_pred_class_0": 16764,
+ "eval_pred_class_1": 2904,
+ "eval_predicted_binding_ratio": 0.1476510067114094,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7959591423464981,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.181,
+ "eval_steps_per_second": 5.645,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3926
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9236869111923289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6687707641196013,
+ "eval_f1_macro": 0.8044598207679289,
+ "eval_loss": 0.2716231048107147,
+ "eval_pr_auc": 0.6568319718650024,
+ "eval_precision": 0.6896197327852004,
+ "eval_precision_macro": 0.812330315374629,
+ "eval_pred_class_0": 16749,
+ "eval_pred_class_1": 2919,
+ "eval_predicted_binding_ratio": 0.1484136668700427,
+ "eval_recall": 0.6491454369558207,
+ "eval_recall_macro": 0.7972292042629046,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.369,
+ "eval_steps_per_second": 4.15,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3952
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9238590985638783,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671120761650242,
+ "eval_f1_macro": 0.8036750821318089,
+ "eval_loss": 0.2716236114501953,
+ "eval_pr_auc": 0.657752547087354,
+ "eval_precision": 0.691961191961192,
+ "eval_precision_macro": 0.8130882112827054,
+ "eval_pred_class_0": 16782,
+ "eval_pred_class_1": 2886,
+ "eval_predicted_binding_ratio": 0.14673581452104942,
+ "eval_recall": 0.6439858110287005,
+ "eval_recall_macro": 0.7951624594468667,
+ "eval_runtime": 0.2469,
+ "eval_samples_per_second": 660.315,
+ "eval_steps_per_second": 4.051,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 153.84615384615384,
+ "grad_norm": 13863.017578125,
+ "learning_rate": 8.743443888522679e-07,
+ "loss": 0.244,
+ "step": 4000
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9239073328287097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6674449633088726,
+ "eval_f1_macro": 0.8038184624582756,
+ "eval_loss": 0.27161940932273865,
+ "eval_pr_auc": 0.6577434889769634,
+ "eval_precision": 0.6911917098445596,
+ "eval_precision_macro": 0.8128050601926549,
+ "eval_pred_class_0": 16773,
+ "eval_pred_class_1": 2895,
+ "eval_predicted_binding_ratio": 0.1471934106162294,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956565102914266,
+ "eval_runtime": 0.2311,
+ "eval_samples_per_second": 705.281,
+ "eval_steps_per_second": 4.327,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4004
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9240325316952941,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8033357331413487,
+ "eval_loss": 0.2715211510658264,
+ "eval_pr_auc": 0.6581299979478261,
+ "eval_precision": 0.689893066574681,
+ "eval_precision_macro": 0.812118099868532,
+ "eval_pred_class_0": 16769,
+ "eval_pred_class_1": 2899,
+ "eval_predicted_binding_ratio": 0.14739678665853162,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7953443695848741,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.735,
+ "eval_steps_per_second": 4.416,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4030
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.8987187309334961,
+ "eval_auc": 0.9240861966945435,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6688829787234043,
+ "eval_f1_macro": 0.8045495325789891,
+ "eval_loss": 0.27123013138771057,
+ "eval_pr_auc": 0.6585643355556502,
+ "eval_precision": 0.6902229845626072,
+ "eval_precision_macro": 0.8126098507842583,
+ "eval_pred_class_0": 16753,
+ "eval_pred_class_1": 2915,
+ "eval_predicted_binding_ratio": 0.14821029082774048,
+ "eval_recall": 0.6488229603353757,
+ "eval_recall_macro": 0.7971585073904801,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.601,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4056
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.8987695749440716,
+ "eval_auc": 0.9242226073999265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668111351891982,
+ "eval_f1_macro": 0.8041939607346642,
+ "eval_loss": 0.2712385952472687,
+ "eval_pr_auc": 0.6591839305732792,
+ "eval_precision": 0.6915113871635611,
+ "eval_precision_macro": 0.8130484783164258,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7961402252220942,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.046,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4082
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9241500613527003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6699883740242485,
+ "eval_f1_macro": 0.8051727852411502,
+ "eval_loss": 0.27118799090385437,
+ "eval_pr_auc": 0.6587023192472763,
+ "eval_precision": 0.6907534246575342,
+ "eval_precision_macro": 0.8130146392454138,
+ "eval_pred_class_0": 16748,
+ "eval_pred_class_1": 2920,
+ "eval_predicted_binding_ratio": 0.14846451088061827,
+ "eval_recall": 0.6504353434376008,
+ "eval_recall_macro": 0.7979646989415927,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.927,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4108
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.8990237949969494,
+ "eval_auc": 0.9243365652302152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6693306693306693,
+ "eval_f1_macro": 0.8048723553674049,
+ "eval_loss": 0.2712218463420868,
+ "eval_pr_auc": 0.6594296109425748,
+ "eval_precision": 0.6919104991394148,
+ "eval_precision_macro": 0.8134133417966358,
+ "eval_pred_class_0": 16763,
+ "eval_pred_class_1": 2905,
+ "eval_predicted_binding_ratio": 0.14770185072198494,
+ "eval_recall": 0.6481780070944857,
+ "eval_recall_macro": 0.7970774746041632,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.627,
+ "eval_steps_per_second": 3.906,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4134
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.8990746390075249,
+ "eval_auc": 0.9243664342695147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6709762970329852,
+ "eval_f1_macro": 0.805686028587357,
+ "eval_loss": 0.2712063789367676,
+ "eval_pr_auc": 0.6596905428752633,
+ "eval_precision": 0.6903137789904502,
+ "eval_precision_macro": 0.8129807422676916,
+ "eval_pred_class_0": 16736,
+ "eval_pred_class_1": 2932,
+ "eval_predicted_binding_ratio": 0.1490746390075249,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7989424647168202,
+ "eval_runtime": 0.1787,
+ "eval_samples_per_second": 912.327,
+ "eval_steps_per_second": 5.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4160
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9244822704721024,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710643708705902,
+ "eval_pr_auc": 0.6603246471492675,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.853,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4186
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9245231275027241,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710554301738739,
+ "eval_pr_auc": 0.6603878428843051,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.523,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.8993288590604027,
+ "eval_auc": 0.9246038585815736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6719681908548708,
+ "eval_f1_macro": 0.8062543656977057,
+ "eval_loss": 0.27094030380249023,
+ "eval_pr_auc": 0.6606722300563197,
+ "eval_precision": 0.6909710391822828,
+ "eval_precision_macro": 0.8134231279100321,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.653982586262496,
+ "eval_recall_macro": 0.7996175984369762,
+ "eval_runtime": 0.2218,
+ "eval_samples_per_second": 735.026,
+ "eval_steps_per_second": 4.509,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4238
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.8992271710392515,
+ "eval_auc": 0.9247262642209572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6713101160862355,
+ "eval_f1_macro": 0.8059006594362601,
+ "eval_loss": 0.2709755003452301,
+ "eval_pr_auc": 0.6610856256039915,
+ "eval_precision": 0.691020826220553,
+ "eval_precision_macro": 0.8133400325618567,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7990330061546183,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.878,
+ "eval_steps_per_second": 4.012,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4264
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9247699826062725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6738446248136492,
+ "eval_f1_macro": 0.8073568599908361,
+ "eval_loss": 0.2707850933074951,
+ "eval_pr_auc": 0.6613417671448518,
+ "eval_precision": 0.6927792915531336,
+ "eval_precision_macro": 0.8145046350187375,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007359306946413,
+ "eval_runtime": 0.1724,
+ "eval_samples_per_second": 945.745,
+ "eval_steps_per_second": 5.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4290
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9248060123174118,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6753719008264463,
+ "eval_f1_macro": 0.8081840577256068,
+ "eval_loss": 0.2708885669708252,
+ "eval_pr_auc": 0.6612557604235284,
+ "eval_precision": 0.6927772126144456,
+ "eval_precision_macro": 0.8147479579430862,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6588197355691713,
+ "eval_recall_macro": 0.8020663535695799,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.671,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4316
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9248204358808662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741721854304635,
+ "eval_f1_macro": 0.8075329932438238,
+ "eval_loss": 0.270906925201416,
+ "eval_pr_auc": 0.6612342465257918,
+ "eval_precision": 0.692752636951344,
+ "eval_precision_macro": 0.8145453662370445,
+ "eval_pred_class_0": 16729,
+ "eval_pred_class_1": 2939,
+ "eval_predicted_binding_ratio": 0.1494305470815538,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010282268358203,
+ "eval_runtime": 0.2263,
+ "eval_samples_per_second": 720.252,
+ "eval_steps_per_second": 4.419,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4342
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9249249239896699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706829011440277,
+ "eval_pr_auc": 0.6618605064537387,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.89,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4368
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9250021027063997,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2705872058868408,
+ "eval_pr_auc": 0.6621173041985378,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.716,
+ "eval_steps_per_second": 3.814,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4394
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9250495097088196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706546485424042,
+ "eval_pr_auc": 0.6620136434657915,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.539,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4420
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9250856659424456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6748182419035029,
+ "eval_f1_macro": 0.8078453665953039,
+ "eval_loss": 0.27060601115226746,
+ "eval_pr_auc": 0.6621340116082275,
+ "eval_precision": 0.6919688241274145,
+ "eval_precision_macro": 0.814310068581025,
+ "eval_pred_class_0": 16717,
+ "eval_pred_class_1": 2951,
+ "eval_predicted_binding_ratio": 0.15004067520846046,
+ "eval_recall": 0.6584972589487262,
+ "eval_recall_macro": 0.8018145738215594,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.143,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9000406752084604,
+ "eval_auc": 0.9252334636716082,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741796486576069,
+ "eval_f1_macro": 0.8075720776469225,
+ "eval_loss": 0.27041611075401306,
+ "eval_pr_auc": 0.6630610344326174,
+ "eval_precision": 0.6934878963518581,
+ "eval_precision_macro": 0.8148646532849819,
+ "eval_pred_class_0": 16735,
+ "eval_pred_class_1": 2933,
+ "eval_predicted_binding_ratio": 0.14912548301810047,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8008264721324394,
+ "eval_runtime": 0.2332,
+ "eval_samples_per_second": 698.832,
+ "eval_steps_per_second": 4.287,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4472
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.8998372991661582,
+ "eval_auc": 0.9253198298673536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740569159497022,
+ "eval_f1_macro": 0.8074417704823604,
+ "eval_loss": 0.2705075442790985,
+ "eval_pr_auc": 0.662984991174244,
+ "eval_precision": 0.6921508664627931,
+ "eval_precision_macro": 0.8142667635751932,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.8010989237082449,
+ "eval_runtime": 0.2134,
+ "eval_samples_per_second": 763.793,
+ "eval_steps_per_second": 4.686,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4498
+ },
+ {
+ "epoch": 173.07692307692307,
+ "grad_norm": 15784.1748046875,
+ "learning_rate": 8.266086590174684e-07,
+ "loss": 0.2376,
+ "step": 4500
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9254431016991443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740679370339685,
+ "eval_f1_macro": 0.8075003208787752,
+ "eval_loss": 0.2703414559364319,
+ "eval_pr_auc": 0.6637127837233647,
+ "eval_precision": 0.6932515337423313,
+ "eval_precision_macro": 0.8147445669189726,
+ "eval_pred_class_0": 16734,
+ "eval_pred_class_1": 2934,
+ "eval_predicted_binding_ratio": 0.14917632702867603,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007962916531735,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.53,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4524
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9255452150782025,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746189529489728,
+ "eval_f1_macro": 0.8078199869849969,
+ "eval_loss": 0.27038928866386414,
+ "eval_pr_auc": 0.6639609479782242,
+ "eval_precision": 0.6936967632027258,
+ "eval_precision_macro": 0.8150250385068789,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8011489487528844,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4550
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9255934298780361,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2702932059764862,
+ "eval_pr_auc": 0.6640830183725597,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.1918,
+ "eval_samples_per_second": 849.843,
+ "eval_steps_per_second": 5.214,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4576
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9255964274877148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6745992397950752,
+ "eval_f1_macro": 0.8077217319600283,
+ "eval_loss": 0.27021023631095886,
+ "eval_pr_auc": 0.6640237853140233,
+ "eval_precision": 0.691864406779661,
+ "eval_precision_macro": 0.8142298466485935,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8016533355113369,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.962,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4602
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.900549115314216,
+ "eval_auc": 0.9257395146873824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6756218905472637,
+ "eval_f1_macro": 0.8084468667292255,
+ "eval_loss": 0.27011793851852417,
+ "eval_pr_auc": 0.6647736112265655,
+ "eval_precision": 0.695459201092523,
+ "eval_precision_macro": 0.8159475347119822,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.801521450417969,
+ "eval_runtime": 0.2294,
+ "eval_samples_per_second": 710.491,
+ "eval_steps_per_second": 4.359,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4628
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9003965832824893,
+ "eval_auc": 0.9258087711499611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6755010766937221,
+ "eval_f1_macro": 0.8083352405901716,
+ "eval_loss": 0.2701837122440338,
+ "eval_pr_auc": 0.6649310182754444,
+ "eval_precision": 0.6944822888283378,
+ "eval_precision_macro": 0.8155055479522995,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.8016930246420839,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.526,
+ "eval_steps_per_second": 5.095,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4654
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9004474272930649,
+ "eval_auc": 0.9258260560681089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6762566137566137,
+ "eval_f1_macro": 0.8087183092815753,
+ "eval_loss": 0.27005937695503235,
+ "eval_pr_auc": 0.6649666141685525,
+ "eval_precision": 0.6939260264675942,
+ "eval_precision_macro": 0.8153859544454471,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.802509552107089,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.51,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9259436830505047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6764851894754261,
+ "eval_f1_macro": 0.8088820685009666,
+ "eval_loss": 0.2699625492095947,
+ "eval_pr_auc": 0.6656768815498022,
+ "eval_precision": 0.6947654656696125,
+ "eval_precision_macro": 0.8157852199805673,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.6591422121896162,
+ "eval_recall_macro": 0.8024690357139305,
+ "eval_runtime": 0.1825,
+ "eval_samples_per_second": 893.316,
+ "eval_steps_per_second": 5.48,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4706
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9259346220939755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787778695293146,
+ "eval_f1_macro": 0.8101680579881181,
+ "eval_loss": 0.2698967456817627,
+ "eval_pr_auc": 0.6657214869012321,
+ "eval_precision": 0.6956668923493569,
+ "eval_precision_macro": 0.8165423129929146,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.8042124766471122,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.302,
+ "eval_steps_per_second": 4.02,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4732
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260415822575143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782407407407407,
+ "eval_f1_macro": 0.8098906179070202,
+ "eval_loss": 0.26987963914871216,
+ "eval_pr_auc": 0.6659658035928079,
+ "eval_precision": 0.6959619952494062,
+ "eval_precision_macro": 0.8165833539431051,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8036580648440201,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.055,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4758
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260998701937684,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6781343036718491,
+ "eval_f1_macro": 0.8098391554406106,
+ "eval_loss": 0.26986023783683777,
+ "eval_pr_auc": 0.6661468987350531,
+ "eval_precision": 0.6960950764006791,
+ "eval_precision_macro": 0.8166237506024205,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6610770719122864,
+ "eval_recall_macro": 0.8035270070130636,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.681,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4784
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9012609314622737,
+ "eval_auc": 0.9262103340569317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787958981144558,
+ "eval_f1_macro": 0.810230030763446,
+ "eval_loss": 0.26986950635910034,
+ "eval_pr_auc": 0.6665273243194801,
+ "eval_precision": 0.6967741935483871,
+ "eval_precision_macro": 0.8170231070594294,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8039098445920405,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.338,
+ "eval_steps_per_second": 4.229,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4810
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9014134634940004,
+ "eval_auc": 0.9262454683781669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6797687861271676,
+ "eval_f1_macro": 0.8107536578092345,
+ "eval_loss": 0.2697572410106659,
+ "eval_pr_auc": 0.666716804071224,
+ "eval_precision": 0.6966824644549763,
+ "eval_precision_macro": 0.8171398441695726,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8047867330155776,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.74,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4836
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9263210315000697,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6784592494627211,
+ "eval_f1_macro": 0.810014015033881,
+ "eval_loss": 0.26983824372291565,
+ "eval_pr_auc": 0.6669884807739552,
+ "eval_precision": 0.6960651289009498,
+ "eval_precision_macro": 0.8166629472255945,
+ "eval_pred_class_0": 16720,
+ "eval_pred_class_1": 2948,
+ "eval_predicted_binding_ratio": 0.14988814317673377,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038193031542425,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.044,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4862
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9012100874516982,
+ "eval_auc": 0.9264191350895575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6786836447825368,
+ "eval_f1_macro": 0.8101580079180191,
+ "eval_loss": 0.26967287063598633,
+ "eval_pr_auc": 0.667515048707415,
+ "eval_precision": 0.6965376782077393,
+ "eval_precision_macro": 0.8169029737767557,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038796641127746,
+ "eval_runtime": 0.1851,
+ "eval_samples_per_second": 880.782,
+ "eval_steps_per_second": 5.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4888
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9013626194834249,
+ "eval_auc": 0.9265750497228504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782752902155887,
+ "eval_f1_macro": 0.8100137635248964,
+ "eval_loss": 0.2697126567363739,
+ "eval_pr_auc": 0.6680073523436961,
+ "eval_precision": 0.698190508706043,
+ "eval_precision_macro": 0.8175521514197519,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.8030528007338771,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.766,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9265310393625665,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6808651147432723,
+ "eval_f1_macro": 0.8113902183590456,
+ "eval_loss": 0.26952171325683594,
+ "eval_pr_auc": 0.6676611850618266,
+ "eval_precision": 0.6975642760487145,
+ "eval_precision_macro": 0.8176966904417818,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8054920472149997,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.883,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4940
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.901921903599756,
+ "eval_auc": 0.9266825451738318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812097174020822,
+ "eval_f1_macro": 0.8116278420268636,
+ "eval_loss": 0.2694892883300781,
+ "eval_pr_auc": 0.668268169384226,
+ "eval_precision": 0.6986440677966101,
+ "eval_precision_macro": 0.8182178348314311,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8054817113011072,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.763,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4966
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9018710595891803,
+ "eval_auc": 0.9266840147811743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6814130075932651,
+ "eval_f1_macro": 0.8117083668893665,
+ "eval_loss": 0.26940062642097473,
+ "eval_pr_auc": 0.6682450098479531,
+ "eval_precision": 0.6980047345282381,
+ "eval_precision_macro": 0.817974900326174,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8058447043147107,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.715,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4992
+ },
+ {
+ "epoch": 192.30769230769232,
+ "grad_norm": 15858.0107421875,
+ "learning_rate": 7.72994743624204e-07,
+ "loss": 0.2316,
+ "step": 5000
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9021761236526337,
+ "eval_auc": 0.9268377687996988,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6816677696889477,
+ "eval_f1_macro": 0.8119380540142443,
+ "eval_loss": 0.26932862401008606,
+ "eval_pr_auc": 0.6692251134414691,
+ "eval_precision": 0.6999660210669385,
+ "eval_precision_macro": 0.8188619343002854,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6643018381167365,
+ "eval_recall_macro": 0.8055015558664808,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.594,
+ "eval_steps_per_second": 5.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5018
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.92693509378927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6815697963238947,
+ "eval_f1_macro": 0.8119084228038069,
+ "eval_loss": 0.2693846523761749,
+ "eval_pr_auc": 0.6695232673057094,
+ "eval_precision": 0.7004765146358066,
+ "eval_precision_macro": 0.8190667092007484,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8052696206838338,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.258,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5044
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9022778116737848,
+ "eval_auc": 0.926939395553809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6825239511067063,
+ "eval_f1_macro": 0.8123875088746679,
+ "eval_loss": 0.269380122423172,
+ "eval_pr_auc": 0.6693235837806535,
+ "eval_precision": 0.6996274974602099,
+ "eval_precision_macro": 0.8188535333546936,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6662366978394066,
+ "eval_recall_macro": 0.8063482638107518,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.818,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5070
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.9269277165550606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6819304892110031,
+ "eval_f1_macro": 0.8119407443800393,
+ "eval_loss": 0.2693455219268799,
+ "eval_pr_auc": 0.6690854783865479,
+ "eval_precision": 0.696969696969697,
+ "eval_precision_macro": 0.8176128877709905,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.8066008708211837,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.505,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5096
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.926915803976337,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6826622843056697,
+ "eval_f1_macro": 0.8122944214527055,
+ "eval_loss": 0.2691311538219452,
+ "eval_pr_auc": 0.6692617138980786,
+ "eval_precision": 0.6960455764075067,
+ "eval_precision_macro": 0.8173347038115213,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8075182756378791,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.485,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5122
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9023794996949359,
+ "eval_auc": 0.9270834170733764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822244289970208,
+ "eval_f1_macro": 0.8122781903500151,
+ "eval_loss": 0.26926350593566895,
+ "eval_pr_auc": 0.6700425139918407,
+ "eval_precision": 0.7007820469228153,
+ "eval_precision_macro": 0.8193035600788525,
+ "eval_pred_class_0": 16727,
+ "eval_pred_class_1": 2941,
+ "eval_predicted_binding_ratio": 0.1495322351027049,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8057533356145012,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.821,
+ "eval_steps_per_second": 5.569,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9023286556843604,
+ "eval_auc": 0.9271362061477199,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822167080231596,
+ "eval_f1_macro": 0.812256712426767,
+ "eval_loss": 0.26931333541870117,
+ "eval_pr_auc": 0.6701662107301889,
+ "eval_precision": 0.7004076086956522,
+ "eval_precision_macro": 0.8191406615590195,
+ "eval_pred_class_0": 16724,
+ "eval_pred_class_1": 2944,
+ "eval_predicted_binding_ratio": 0.14968476713443157,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8058542129661919,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.682,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5174
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.9019727476103315,
+ "eval_auc": 0.9271006630615285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6823723228995058,
+ "eval_f1_macro": 0.8122076248057319,
+ "eval_loss": 0.26923447847366333,
+ "eval_pr_auc": 0.6700054234044599,
+ "eval_precision": 0.6975412596833951,
+ "eval_precision_macro": 0.8179304597716335,
+ "eval_pred_class_0": 16699,
+ "eval_pred_class_1": 2969,
+ "eval_predicted_binding_ratio": 0.15095586739882041,
+ "eval_recall": 0.6678490809416318,
+ "eval_recall_macro": 0.8068224700899382,
+ "eval_runtime": 0.2001,
+ "eval_samples_per_second": 814.446,
+ "eval_steps_per_second": 4.997,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5200
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.9272154481542286,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6827256228345158,
+ "eval_f1_macro": 0.8124672441745833,
+ "eval_loss": 0.2691180408000946,
+ "eval_pr_auc": 0.6705128874375396,
+ "eval_precision": 0.6989864864864865,
+ "eval_precision_macro": 0.8186098340979236,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6672041277007417,
+ "eval_recall_macro": 0.8067112568243553,
+ "eval_runtime": 0.2326,
+ "eval_samples_per_second": 700.909,
+ "eval_steps_per_second": 4.3,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5226
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9272916730860608,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6847915636842973,
+ "eval_f1_macro": 0.8136435649304945,
+ "eval_loss": 0.2689346969127655,
+ "eval_pr_auc": 0.6709998700001464,
+ "eval_precision": 0.7001347708894878,
+ "eval_precision_macro": 0.8194386429297739,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.8081925820956238,
+ "eval_runtime": 0.1664,
+ "eval_samples_per_second": 979.381,
+ "eval_steps_per_second": 6.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5252
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9025828757372382,
+ "eval_auc": 0.9274415730349983,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6833057851239669,
+ "eval_f1_macro": 0.8128720237282395,
+ "eval_loss": 0.269077867269516,
+ "eval_pr_auc": 0.6716085952371595,
+ "eval_precision": 0.7009155645981688,
+ "eval_precision_macro": 0.819534880211639,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.8066604045173044,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.756,
+ "eval_steps_per_second": 3.913,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5278
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9274026625041677,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6848095002474023,
+ "eval_f1_macro": 0.8136877723940104,
+ "eval_loss": 0.2691201865673065,
+ "eval_pr_auc": 0.6711363619469519,
+ "eval_precision": 0.700877785280216,
+ "eval_precision_macro": 0.8197612917781423,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6694614640438569,
+ "eval_recall_macro": 0.8079908273922429,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.544,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5304
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9029896278218426,
+ "eval_auc": 0.9274924350745481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6850445691647409,
+ "eval_f1_macro": 0.8138546963859644,
+ "eval_loss": 0.2691231071949005,
+ "eval_pr_auc": 0.6713794419677425,
+ "eval_precision": 0.7017247210010146,
+ "eval_precision_macro": 0.8201640180913157,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8079503109990844,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.331,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5330
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9275284258556915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860062633921213,
+ "eval_f1_macro": 0.8143728753012186,
+ "eval_loss": 0.26890990138053894,
+ "eval_pr_auc": 0.6717808840440014,
+ "eval_precision": 0.7016183412002697,
+ "eval_precision_macro": 0.8202739053624388,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8088271994226215,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.148,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5356
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9030913158429937,
+ "eval_auc": 0.9275997261430513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6854785478547855,
+ "eval_f1_macro": 0.8141000144010073,
+ "eval_loss": 0.2689792513847351,
+ "eval_pr_auc": 0.6718662790463032,
+ "eval_precision": 0.7019263264616424,
+ "eval_precision_macro": 0.8203209943398044,
+ "eval_pred_class_0": 16709,
+ "eval_pred_class_1": 2959,
+ "eval_predicted_binding_ratio": 0.15044742729306487,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8082727876195295,
+ "eval_runtime": 0.195,
+ "eval_samples_per_second": 835.813,
+ "eval_steps_per_second": 5.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9276906860783045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.2688303291797638,
+ "eval_pr_auc": 0.6723694322774509,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.22,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5408
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9277029684919884,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851760039662865,
+ "eval_f1_macro": 0.8139715080669648,
+ "eval_loss": 0.2689387798309326,
+ "eval_pr_auc": 0.6722283267888528,
+ "eval_precision": 0.7027118644067797,
+ "eval_precision_macro": 0.8206106277411336,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8077787367749694,
+ "eval_runtime": 0.2435,
+ "eval_samples_per_second": 669.46,
+ "eval_steps_per_second": 4.107,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5434
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9277600593308708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6846370100876468,
+ "eval_f1_macro": 0.8136754097270521,
+ "eval_loss": 0.26892563700675964,
+ "eval_pr_auc": 0.6725269853749476,
+ "eval_precision": 0.7026476578411406,
+ "eval_precision_macro": 0.8204961767258567,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.807325202323568,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.763,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5460
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9278453744167288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.686128979053274,
+ "eval_f1_macro": 0.8144677293907912,
+ "eval_loss": 0.26878559589385986,
+ "eval_pr_auc": 0.6729846306066621,
+ "eval_precision": 0.7022282241728561,
+ "eval_precision_macro": 0.8205562286912407,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808756502550197,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.879,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5486
+ },
+ {
+ "epoch": 211.53846153846155,
+ "grad_norm": 16655.041015625,
+ "learning_rate": 7.144675667015729e-07,
+ "loss": 0.2259,
+ "step": 5500
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9028879398006915,
+ "eval_auc": 0.9279623006591996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6845061116617113,
+ "eval_f1_macro": 0.8135588667797169,
+ "eval_loss": 0.26880088448524475,
+ "eval_pr_auc": 0.6734939312101108,
+ "eval_precision": 0.7016593294954284,
+ "eval_precision_macro": 0.8200489288817256,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6681715575620768,
+ "eval_recall_macro": 0.8074967765476829,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.433,
+ "eval_steps_per_second": 4.616,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5512
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279055601902797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6863359156090325,
+ "eval_f1_macro": 0.8145677594216373,
+ "eval_loss": 0.2687283456325531,
+ "eval_pr_auc": 0.6734074239428265,
+ "eval_precision": 0.7019554956169926,
+ "eval_precision_macro": 0.820472419105347,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.80901861821211,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.742,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5538
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279595755594916,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6864392815949909,
+ "eval_f1_macro": 0.8146177229810407,
+ "eval_loss": 0.2687055766582489,
+ "eval_pr_auc": 0.6734235549479375,
+ "eval_precision": 0.7018194070080862,
+ "eval_precision_macro": 0.8204306615878754,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8091496760430665,
+ "eval_runtime": 0.225,
+ "eval_samples_per_second": 724.337,
+ "eval_steps_per_second": 4.444,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5564
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9280330753916157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685383980181668,
+ "eval_f1_macro": 0.8140720567955604,
+ "eval_loss": 0.2687467932701111,
+ "eval_pr_auc": 0.6737104569152422,
+ "eval_precision": 0.7024373730534867,
+ "eval_precision_macro": 0.8205258541706347,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8080408524368825,
+ "eval_runtime": 0.1833,
+ "eval_samples_per_second": 889.114,
+ "eval_steps_per_second": 5.455,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5590
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9281060496687963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860158311345647,
+ "eval_f1_macro": 0.8143952682963037,
+ "eval_loss": 0.26888203620910645,
+ "eval_pr_auc": 0.6738155361634312,
+ "eval_precision": 0.7019912251096861,
+ "eval_precision_macro": 0.8204358998939631,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808726322070931,
+ "eval_runtime": 0.1764,
+ "eval_samples_per_second": 923.938,
+ "eval_steps_per_second": 5.668,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9281046287239484,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6865425794761983,
+ "eval_f1_macro": 0.8146676522813128,
+ "eval_loss": 0.2686736285686493,
+ "eval_pr_auc": 0.6740322097472393,
+ "eval_precision": 0.7016835016835017,
+ "eval_precision_macro": 0.8203890020095554,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.672041277007417,
+ "eval_recall_macro": 0.809280733874023,
+ "eval_runtime": 0.2537,
+ "eval_samples_per_second": 642.37,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5642
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9282425576991689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862615587846763,
+ "eval_f1_macro": 0.814585106315415,
+ "eval_loss": 0.26872488856315613,
+ "eval_pr_auc": 0.6745604450622946,
+ "eval_precision": 0.7032148900169205,
+ "eval_precision_macro": 0.8210025266814094,
+ "eval_pred_class_0": 16713,
+ "eval_pred_class_1": 2955,
+ "eval_predicted_binding_ratio": 0.15024405125076265,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.808584928326082,
+ "eval_runtime": 0.2515,
+ "eval_samples_per_second": 648.077,
+ "eval_steps_per_second": 3.976,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5668
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9283728174652107,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.26861947774887085,
+ "eval_pr_auc": 0.675175157595335,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 605.917,
+ "eval_steps_per_second": 3.717,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5694
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9035997559487492,
+ "eval_auc": 0.9283705984554486,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6875411997363217,
+ "eval_f1_macro": 0.8152747479984963,
+ "eval_loss": 0.2684967517852783,
+ "eval_pr_auc": 0.6752603675091132,
+ "eval_precision": 0.703067071115605,
+ "eval_precision_macro": 0.8211461336058236,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.809754112890798,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.018,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5720
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9285080992007146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871489924017179,
+ "eval_f1_macro": 0.8151206773197821,
+ "eval_loss": 0.2685548961162567,
+ "eval_pr_auc": 0.6758164431668767,
+ "eval_precision": 0.7043684388757196,
+ "eval_precision_macro": 0.8216427895844347,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.809028126863591,
+ "eval_runtime": 0.1986,
+ "eval_samples_per_second": 820.602,
+ "eval_steps_per_second": 5.034,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5746
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9285004105265384,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870052770448549,
+ "eval_f1_macro": 0.8149801571567146,
+ "eval_loss": 0.2685752809047699,
+ "eval_pr_auc": 0.6755687553750968,
+ "eval_precision": 0.7030037124535943,
+ "eval_precision_macro": 0.8210319370409247,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8093005784393965,
+ "eval_runtime": 0.2128,
+ "eval_samples_per_second": 765.976,
+ "eval_steps_per_second": 4.699,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5772
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9285113303903685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871913072110636,
+ "eval_f1_macro": 0.815034532807023,
+ "eval_loss": 0.2685534358024597,
+ "eval_pr_auc": 0.675465436437485,
+ "eval_precision": 0.7019845274133871,
+ "eval_precision_macro": 0.8206238899420935,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8097644488046905,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.434,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5798
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9286790602773953,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685742639761826,
+ "eval_f1_macro": 0.814334221653217,
+ "eval_loss": 0.2686038315296173,
+ "eval_pr_auc": 0.6763975611431872,
+ "eval_precision": 0.7039049235993209,
+ "eval_precision_macro": 0.8212163498580232,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8079296391712996,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5824
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9286844910118134,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6857801944307135,
+ "eval_f1_macro": 0.8142280597608222,
+ "eval_loss": 0.268373966217041,
+ "eval_pr_auc": 0.6766849702960268,
+ "eval_precision": 0.7011455525606469,
+ "eval_precision_macro": 0.8200338541246348,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8087668384640894,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.527,
+ "eval_steps_per_second": 3.819,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9286265437130228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6868321551865856,
+ "eval_f1_macro": 0.8147720454758898,
+ "eval_loss": 0.268480122089386,
+ "eval_pr_auc": 0.6762864798501788,
+ "eval_precision": 0.7005365526492288,
+ "eval_precision_macro": 0.8199434531195322,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8098756620702734,
+ "eval_runtime": 0.1801,
+ "eval_samples_per_second": 905.249,
+ "eval_steps_per_second": 5.554,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5876
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9286911091111043,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6878185106033208,
+ "eval_f1_macro": 0.8153554406082493,
+ "eval_loss": 0.2684793770313263,
+ "eval_pr_auc": 0.6763719243742072,
+ "eval_precision": 0.7015425888665325,
+ "eval_precision_macro": 0.8205363669491479,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.810449918438739,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.327,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5902
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9287447935753516,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870580496628844,
+ "eval_f1_macro": 0.8149167860703537,
+ "eval_loss": 0.2684626877307892,
+ "eval_pr_auc": 0.6765555444970285,
+ "eval_precision": 0.701006711409396,
+ "eval_precision_macro": 0.8201821668264622,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8099360230288054,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.184,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5928
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9037522879804759,
+ "eval_auc": 0.92885044958403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6877783275606135,
+ "eval_f1_macro": 0.815442675636767,
+ "eval_loss": 0.2684047222137451,
+ "eval_pr_auc": 0.6771563538797724,
+ "eval_precision": 0.7039162727886563,
+ "eval_precision_macro": 0.8215498998326138,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6723637536278619,
+ "eval_recall_macro": 0.8097135964976395,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.352,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5954
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9288052907888691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6866359447004609,
+ "eval_f1_macro": 0.814695001815053,
+ "eval_loss": 0.2683703601360321,
+ "eval_pr_auc": 0.6770692308922137,
+ "eval_precision": 0.7011764705882353,
+ "eval_precision_macro": 0.8201862703986524,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80951266905667,
+ "eval_runtime": 0.1803,
+ "eval_samples_per_second": 903.896,
+ "eval_steps_per_second": 5.545,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5980
+ },
+ {
+ "epoch": 230.76923076923077,
+ "grad_norm": 18226.349609375,
+ "learning_rate": 6.520804793983146e-07,
+ "loss": 0.2213,
+ "step": 6000
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289028104284194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881685041961494,
+ "eval_f1_macro": 0.8155957226774667,
+ "eval_loss": 0.26838722825050354,
+ "eval_pr_auc": 0.677382862396771,
+ "eval_precision": 0.7026209677419355,
+ "eval_precision_macro": 0.821056469972094,
+ "eval_pred_class_0": 16692,
+ "eval_pred_class_1": 2976,
+ "eval_predicted_binding_ratio": 0.1513117754728493,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8104395825248465,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.676,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6006
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289413705892875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882710972199375,
+ "eval_f1_macro": 0.8156453059542872,
+ "eval_loss": 0.2681979238986969,
+ "eval_pr_auc": 0.6777962434595076,
+ "eval_precision": 0.7024848891873741,
+ "eval_precision_macro": 0.8210147633474318,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8105706403558031,
+ "eval_runtime": 0.2338,
+ "eval_samples_per_second": 697.187,
+ "eval_steps_per_second": 4.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6032
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9290857424788171,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6879233437964646,
+ "eval_f1_macro": 0.8155838213438953,
+ "eval_loss": 0.2683457136154175,
+ "eval_pr_auc": 0.6782535995592411,
+ "eval_precision": 0.7052845528455285,
+ "eval_precision_macro": 0.8221624965711252,
+ "eval_pred_class_0": 16716,
+ "eval_pred_class_1": 2952,
+ "eval_predicted_binding_ratio": 0.150091519219036,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.8094411449218342,
+ "eval_runtime": 0.2259,
+ "eval_samples_per_second": 721.682,
+ "eval_steps_per_second": 4.427,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6058
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9291390181781085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881400726792204,
+ "eval_f1_macro": 0.8157063562723066,
+ "eval_loss": 0.26850852370262146,
+ "eval_pr_auc": 0.678201898193761,
+ "eval_precision": 0.7053843548933288,
+ "eval_precision_macro": 0.8222404873479507,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8096023832320567,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.187,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9291422688327602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687767886580943,
+ "eval_f1_macro": 0.8154198615351363,
+ "eval_loss": 0.26855188608169556,
+ "eval_pr_auc": 0.6780321638936206,
+ "eval_precision": 0.7035413153456999,
+ "eval_precision_macro": 0.8213868942770528,
+ "eval_pred_class_0": 16703,
+ "eval_pred_class_1": 2965,
+ "eval_predicted_binding_ratio": 0.1507524913565182,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80981447384933,
+ "eval_runtime": 0.1711,
+ "eval_samples_per_second": 952.835,
+ "eval_steps_per_second": 5.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6110
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9292183185796111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881081972620815,
+ "eval_f1_macro": 0.8156376648859622,
+ "eval_loss": 0.2684246003627777,
+ "eval_pr_auc": 0.6783782374353945,
+ "eval_precision": 0.7042538825118163,
+ "eval_precision_macro": 0.8217486340608884,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.8099050152871281,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.985,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6136
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.9291723227895398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.2682003080844879,
+ "eval_pr_auc": 0.678474943261759,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.44,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6162
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9292599542101496,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891891891891891,
+ "eval_f1_macro": 0.8162490373023017,
+ "eval_loss": 0.2683660686016083,
+ "eval_pr_auc": 0.6787382085049865,
+ "eval_precision": 0.704752275025278,
+ "eval_precision_macro": 0.8221384271958916,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8107112068382407,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.396,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6188
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9292687329242089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886434811274106,
+ "eval_f1_macro": 0.8159319482645679,
+ "eval_loss": 0.2683408558368683,
+ "eval_pr_auc": 0.6788700814485856,
+ "eval_precision": 0.7043155765340526,
+ "eval_precision_macro": 0.8218620153057044,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8103585497385295,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.65,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6214
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9038031319910514,
+ "eval_auc": 0.9293738244479479,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886109282422647,
+ "eval_f1_macro": 0.8158628904590758,
+ "eval_loss": 0.26828694343566895,
+ "eval_pr_auc": 0.6792947923537326,
+ "eval_precision": 0.7031932773109244,
+ "eval_precision_macro": 0.8213743898086402,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8106611817936011,
+ "eval_runtime": 0.1692,
+ "eval_samples_per_second": 963.308,
+ "eval_steps_per_second": 5.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6240
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.929388987681323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.26832982897758484,
+ "eval_pr_auc": 0.679377997729221,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.159,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6266
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9294373582011398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891669410602568,
+ "eval_f1_macro": 0.8162027357577154,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6798375856033828,
+ "eval_precision": 0.7040026908846283,
+ "eval_precision_macro": 0.8218126661970311,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8109129615416215,
+ "eval_runtime": 0.2508,
+ "eval_samples_per_second": 649.948,
+ "eval_steps_per_second": 3.987,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6292
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.929443917905437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6897798225435425,
+ "eval_f1_macro": 0.8164989338281623,
+ "eval_loss": 0.26814863085746765,
+ "eval_pr_auc": 0.6797263827568155,
+ "eval_precision": 0.7031825795644892,
+ "eval_precision_macro": 0.8215607197408852,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8116993085273606,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 642.959,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9295684160320964,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.26816216111183167,
+ "eval_pr_auc": 0.6803755613590039,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2375,
+ "eval_samples_per_second": 686.328,
+ "eval_steps_per_second": 4.211,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6344
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9295755402213329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6888815572418344,
+ "eval_f1_macro": 0.8161003326270482,
+ "eval_loss": 0.2682454288005829,
+ "eval_pr_auc": 0.6803390373338819,
+ "eval_precision": 0.7051671732522796,
+ "eval_precision_macro": 0.8222669528798059,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6733311834891971,
+ "eval_recall_macro": 0.8103180333453712,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.048,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6370
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9296204070415253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.2682053744792938,
+ "eval_pr_auc": 0.6804463740899893,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2411,
+ "eval_samples_per_second": 676.2,
+ "eval_steps_per_second": 4.148,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6396
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9296705294111545,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909270541742137,
+ "eval_f1_macro": 0.817248994423186,
+ "eval_loss": 0.2681281566619873,
+ "eval_pr_auc": 0.6807214505356617,
+ "eval_precision": 0.7059219380888291,
+ "eval_precision_macro": 0.8229238344013863,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119002359683303,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.371,
+ "eval_steps_per_second": 5.094,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6422
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9296871135893773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912442396313364,
+ "eval_f1_macro": 0.8174200753177728,
+ "eval_loss": 0.26824310421943665,
+ "eval_pr_auc": 0.6805326752113899,
+ "eval_precision": 0.7058823529411765,
+ "eval_precision_macro": 0.8229585490219571,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8121925321095093,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.162,
+ "eval_steps_per_second": 3.78,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6448
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.929768866580617,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6914841047603361,
+ "eval_f1_macro": 0.8175893393183914,
+ "eval_loss": 0.2681940495967865,
+ "eval_pr_auc": 0.6810052289277716,
+ "eval_precision": 0.7067340067340068,
+ "eval_precision_macro": 0.8233634101223034,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8121520157163508,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.397,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6474
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 35924.55078125,
+ "learning_rate": 5.869563021464528e-07,
+ "loss": 0.2171,
+ "step": 6500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9298367215633461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912928759894459,
+ "eval_f1_macro": 0.817514675551828,
+ "eval_loss": 0.2681121826171875,
+ "eval_pr_auc": 0.681415067318076,
+ "eval_precision": 0.7073911576105298,
+ "eval_precision_macro": 0.8236147646777582,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8117890227027473,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.453,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9046674801708359,
+ "eval_auc": 0.9298684105799504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909510466457887,
+ "eval_f1_macro": 0.8172961371074986,
+ "eval_loss": 0.2681705951690674,
+ "eval_pr_auc": 0.6814913977659953,
+ "eval_precision": 0.7066756574511126,
+ "eval_precision_macro": 0.8232516115060616,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116984812649493,
+ "eval_runtime": 0.271,
+ "eval_samples_per_second": 601.566,
+ "eval_steps_per_second": 3.691,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6526
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300244614682288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6893926857521099,
+ "eval_f1_macro": 0.8165072340543806,
+ "eval_loss": 0.2683667540550232,
+ "eval_pr_auc": 0.6820860933741758,
+ "eval_precision": 0.7080217539089055,
+ "eval_precision_macro": 0.8235792136757251,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8099343685039828,
+ "eval_runtime": 0.2316,
+ "eval_samples_per_second": 703.782,
+ "eval_steps_per_second": 4.318,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9299541441632636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6910408432147562,
+ "eval_f1_macro": 0.8173217684087248,
+ "eval_loss": 0.26824095845222473,
+ "eval_pr_auc": 0.6816317339832768,
+ "eval_precision": 0.7061595422416694,
+ "eval_precision_macro": 0.8230444354317887,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119304164475962,
+ "eval_runtime": 0.2652,
+ "eval_samples_per_second": 614.632,
+ "eval_steps_per_second": 3.771,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6578
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.9044641041285336,
+ "eval_auc": 0.9299257349988078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913093477903729,
+ "eval_f1_macro": 0.8173981849782266,
+ "eval_loss": 0.2680823504924774,
+ "eval_pr_auc": 0.6815799614400636,
+ "eval_precision": 0.7046215673141326,
+ "eval_precision_macro": 0.8224282755645115,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6784908094163173,
+ "eval_recall_macro": 0.8126262219955371,
+ "eval_runtime": 0.1721,
+ "eval_samples_per_second": 947.131,
+ "eval_steps_per_second": 5.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6604
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300487927156216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6902129064202014,
+ "eval_f1_macro": 0.8169037907104764,
+ "eval_loss": 0.2682023346424103,
+ "eval_pr_auc": 0.6819970017971004,
+ "eval_precision": 0.7068965517241379,
+ "eval_precision_macro": 0.8232268515652408,
+ "eval_pred_class_0": 16710,
+ "eval_pred_class_1": 2958,
+ "eval_predicted_binding_ratio": 0.1503965832824893,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8109828311516347,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.732,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6630
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9300701263533355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6908371786420567,
+ "eval_f1_macro": 0.8172233266061071,
+ "eval_loss": 0.26801708340644836,
+ "eval_pr_auc": 0.6822973012887885,
+ "eval_precision": 0.7064374789349511,
+ "eval_precision_macro": 0.8231307207859595,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116683007856833,
+ "eval_runtime": 0.2302,
+ "eval_samples_per_second": 708.13,
+ "eval_steps_per_second": 4.344,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6656
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.9048708562131381,
+ "eval_auc": 0.9301923470752391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6911012052171042,
+ "eval_f1_macro": 0.8174397819709127,
+ "eval_loss": 0.26805296540260315,
+ "eval_pr_auc": 0.6828138126269635,
+ "eval_precision": 0.7080514208389715,
+ "eval_precision_macro": 0.8238677400987582,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8114260296891438,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.189,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6682
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9301935928351055,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912806988626998,
+ "eval_f1_macro": 0.8174910212279173,
+ "eval_loss": 0.26811105012893677,
+ "eval_pr_auc": 0.6827059930276215,
+ "eval_precision": 0.7070128118678355,
+ "eval_precision_macro": 0.8234501252489699,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8118899000544377,
+ "eval_runtime": 0.2455,
+ "eval_samples_per_second": 663.913,
+ "eval_steps_per_second": 4.073,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6708
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9049217002237137,
+ "eval_auc": 0.9302954726341887,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913172664245626,
+ "eval_f1_macro": 0.817561992789179,
+ "eval_loss": 0.2680983245372772,
+ "eval_pr_auc": 0.6832649652047296,
+ "eval_precision": 0.7081501521812648,
+ "eval_precision_macro": 0.8239452215038332,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6752660432118671,
+ "eval_recall_macro": 0.8115872679993663,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.63,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6734
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9302687082620565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920039486673247,
+ "eval_f1_macro": 0.8178583697873878,
+ "eval_loss": 0.2680213451385498,
+ "eval_pr_auc": 0.6832868902825516,
+ "eval_precision": 0.7064158548874706,
+ "eval_precision_macro": 0.8233115761166728,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8127064275194428,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6760
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303376240871719,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.691635043722158,
+ "eval_f1_macro": 0.817733374603378,
+ "eval_loss": 0.2681059241294861,
+ "eval_pr_auc": 0.6834436649713198,
+ "eval_precision": 0.7081081081081081,
+ "eval_precision_macro": 0.8239786410782342,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8118795641405453,
+ "eval_runtime": 0.1861,
+ "eval_samples_per_second": 875.956,
+ "eval_steps_per_second": 5.374,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303520963131211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920415224913494,
+ "eval_f1_macro": 0.8179298603528982,
+ "eval_loss": 0.2680259048938751,
+ "eval_pr_auc": 0.683712511498021,
+ "eval_precision": 0.7075471698113207,
+ "eval_precision_macro": 0.8238035250254208,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8124037954643712,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.771,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6812
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9303956005834594,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.692700609655627,
+ "eval_f1_macro": 0.8183195235731167,
+ "eval_loss": 0.2679544985294342,
+ "eval_pr_auc": 0.6840791766505604,
+ "eval_precision": 0.7082210242587601,
+ "eval_precision_macro": 0.8242003324886615,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8127866330433483,
+ "eval_runtime": 0.224,
+ "eval_samples_per_second": 727.59,
+ "eval_steps_per_second": 4.464,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6838
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304257713302264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933465085638999,
+ "eval_f1_macro": 0.8186850387937344,
+ "eval_loss": 0.26790833473205566,
+ "eval_pr_auc": 0.6843453300290927,
+ "eval_precision": 0.70851565129586,
+ "eval_precision_macro": 0.8244321084532245,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8132703479740159,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.122,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6864
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304709495903853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934474810668423,
+ "eval_f1_macro": 0.8187338421508825,
+ "eval_loss": 0.26806485652923584,
+ "eval_pr_auc": 0.684236710788699,
+ "eval_precision": 0.7083753784056509,
+ "eval_precision_macro": 0.8243883480827296,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.8134014058049723,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 646.986,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6890
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9305072226139984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937767533750412,
+ "eval_f1_macro": 0.8189285426426647,
+ "eval_loss": 0.2682030200958252,
+ "eval_pr_auc": 0.6841419811140891,
+ "eval_precision": 0.708711738984191,
+ "eval_precision_macro": 0.8245864774585525,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8135928245944608,
+ "eval_runtime": 0.1928,
+ "eval_samples_per_second": 845.449,
+ "eval_steps_per_second": 5.187,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6916
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9304825799266392,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6942937016938003,
+ "eval_f1_macro": 0.8191961667392471,
+ "eval_loss": 0.2681148052215576,
+ "eval_pr_auc": 0.6840537331785722,
+ "eval_precision": 0.7083892617449664,
+ "eval_precision_macro": 0.824532598274209,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6807481457594324,
+ "eval_recall_macro": 0.8141472363975528,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.157,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6942
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.930527787384295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6951801283105774,
+ "eval_f1_macro": 0.8197312675109731,
+ "eval_loss": 0.2679014503955841,
+ "eval_pr_auc": 0.6848281199608002,
+ "eval_precision": 0.7095366017461383,
+ "eval_precision_macro": 0.8251697388598875,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8145904349350619,
+ "eval_runtime": 0.2174,
+ "eval_samples_per_second": 749.726,
+ "eval_steps_per_second": 4.6,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6968
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305513205667733,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6949375410913873,
+ "eval_f1_macro": 0.8195606747920547,
+ "eval_loss": 0.26782992482185364,
+ "eval_pr_auc": 0.6850252128050689,
+ "eval_precision": 0.7086825343613812,
+ "eval_precision_macro": 0.8247638023919581,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8146309513282204,
+ "eval_runtime": 0.2304,
+ "eval_samples_per_second": 707.619,
+ "eval_steps_per_second": 4.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6994
+ },
+ {
+ "epoch": 269.2307692307692,
+ "grad_norm": 17604.1328125,
+ "learning_rate": 5.202671165416819e-07,
+ "loss": 0.2132,
+ "step": 7000
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305936180072407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950377916529741,
+ "eval_f1_macro": 0.8196091213903969,
+ "eval_loss": 0.2680298984050751,
+ "eval_pr_auc": 0.6846741481205671,
+ "eval_precision": 0.7085427135678392,
+ "eval_precision_macro": 0.8247203168031008,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.814762009159177,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.148,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9306496966662317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6938909929194796,
+ "eval_f1_macro": 0.8190015347004277,
+ "eval_loss": 0.2681294083595276,
+ "eval_pr_auc": 0.6847333752342389,
+ "eval_precision": 0.7089502018842531,
+ "eval_precision_macro": 0.8247074919339809,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8136230050737269,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.335,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7046
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.9053792963188937,
+ "eval_auc": 0.9306815900653141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6932586121641668,
+ "eval_f1_macro": 0.8186603259504293,
+ "eval_loss": 0.26823949813842773,
+ "eval_pr_auc": 0.6847385438827486,
+ "eval_precision": 0.7090357383681726,
+ "eval_precision_macro": 0.8246412077064189,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130384127913689,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 909.129,
+ "eval_steps_per_second": 5.577,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7072
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9307212597310633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6931443638760711,
+ "eval_f1_macro": 0.8185873316314347,
+ "eval_loss": 0.2681174576282501,
+ "eval_pr_auc": 0.6852124592316542,
+ "eval_precision": 0.7087967644084934,
+ "eval_precision_macro": 0.8245199318120546,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130082323121028,
+ "eval_runtime": 0.1693,
+ "eval_samples_per_second": 962.904,
+ "eval_steps_per_second": 5.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7098
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9307878592214269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6947368421052632,
+ "eval_f1_macro": 0.8194636820581644,
+ "eval_loss": 0.2679351270198822,
+ "eval_pr_auc": 0.685659065605018,
+ "eval_precision": 0.7089627391742196,
+ "eval_precision_macro": 0.8248510741829513,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6810706223798775,
+ "eval_recall_macro": 0.8143688356663075,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.933,
+ "eval_steps_per_second": 4.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7124
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9308354511413273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6945496459739832,
+ "eval_f1_macro": 0.8193909880953703,
+ "eval_loss": 0.2679973542690277,
+ "eval_pr_auc": 0.6858486839853987,
+ "eval_precision": 0.7096231493943472,
+ "eval_precision_macro": 0.8251038602745574,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8140058426527039,
+ "eval_runtime": 0.1976,
+ "eval_samples_per_second": 825.097,
+ "eval_steps_per_second": 5.062,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7150
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9308506727696961,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694320987654321,
+ "eval_f1_macro": 0.819244917025501,
+ "eval_loss": 0.26807495951652527,
+ "eval_pr_auc": 0.6856600891550617,
+ "eval_precision": 0.7091459314055144,
+ "eval_precision_macro": 0.8248616921913159,
+ "eval_pred_class_0": 16694,
+ "eval_pred_class_1": 2974,
+ "eval_predicted_binding_ratio": 0.1512100874516982,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8139454816941719,
+ "eval_runtime": 0.2211,
+ "eval_samples_per_second": 737.174,
+ "eval_steps_per_second": 4.523,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7176
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9308575828439557,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946640316205533,
+ "eval_f1_macro": 0.8194640504423113,
+ "eval_loss": 0.26810142397880554,
+ "eval_pr_auc": 0.6855507532370865,
+ "eval_precision": 0.709861999326826,
+ "eval_precision_macro": 0.8252250644654733,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.81403602313197,
+ "eval_runtime": 0.2072,
+ "eval_samples_per_second": 786.583,
+ "eval_steps_per_second": 4.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7202
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9309145374278527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941467436108821,
+ "eval_f1_macro": 0.8191962415719043,
+ "eval_loss": 0.26818612217903137,
+ "eval_pr_auc": 0.6856281742604067,
+ "eval_precision": 0.7101889338731444,
+ "eval_precision_macro": 0.8252812485457677,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.813481611328878,
+ "eval_runtime": 0.1872,
+ "eval_samples_per_second": 870.561,
+ "eval_steps_per_second": 5.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7228
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9309855846702396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.69433465085639,
+ "eval_f1_macro": 0.8192692975301671,
+ "eval_loss": 0.2680346667766571,
+ "eval_pr_auc": 0.686232632159634,
+ "eval_precision": 0.7095254123190845,
+ "eval_precision_macro": 0.825026825462411,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6797807158980974,
+ "eval_recall_macro": 0.8138446043424814,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.212,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.930922634866985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6957377049180328,
+ "eval_f1_macro": 0.8199473215888755,
+ "eval_loss": 0.26806843280792236,
+ "eval_pr_auc": 0.6857403383581059,
+ "eval_precision": 0.70756918972991,
+ "eval_precision_macro": 0.8244187060893835,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8156794139758723,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.751,
+ "eval_steps_per_second": 5.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7280
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9309180411274773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959227116423776,
+ "eval_f1_macro": 0.8200188959217034,
+ "eval_loss": 0.2680058181285858,
+ "eval_pr_auc": 0.6857513786045211,
+ "eval_precision": 0.7069194943446441,
+ "eval_precision_macro": 0.8241715464761271,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8160424069894758,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.248,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7306
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9055318283506203,
+ "eval_auc": 0.930996368279084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950098489822718,
+ "eval_f1_macro": 0.8195600321797414,
+ "eval_loss": 0.2679016888141632,
+ "eval_pr_auc": 0.6864832067617813,
+ "eval_precision": 0.7077900367769977,
+ "eval_precision_macro": 0.824393309448042,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.814963763862558,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.664,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7332
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9310520771031147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.2679460644721985,
+ "eval_pr_auc": 0.6867185372000254,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.949,
+ "eval_steps_per_second": 4.994,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7358
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.931065877786636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.26795604825019836,
+ "eval_pr_auc": 0.6868264245068394,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2632,
+ "eval_samples_per_second": 619.283,
+ "eval_steps_per_second": 3.799,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7384
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9310428312291054,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696165191740413,
+ "eval_f1_macro": 0.8201894743681304,
+ "eval_loss": 0.2679717540740967,
+ "eval_pr_auc": 0.6865300625458848,
+ "eval_precision": 0.7077640786404532,
+ "eval_precision_macro": 0.8245726255085029,
+ "eval_pred_class_0": 16667,
+ "eval_pred_class_1": 3001,
+ "eval_predicted_binding_ratio": 0.15258287573723817,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160018905963173,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.133,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7410
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9311142093764568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946815495732108,
+ "eval_f1_macro": 0.8193658018591599,
+ "eval_loss": 0.2679450213909149,
+ "eval_pr_auc": 0.6869273130451355,
+ "eval_precision": 0.7074557004346372,
+ "eval_precision_macro": 0.8241961598653369,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8147723450730694,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 908.844,
+ "eval_steps_per_second": 5.576,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7436
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9311615190538873,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695695037791653,
+ "eval_f1_macro": 0.8199978948356761,
+ "eval_loss": 0.2679760158061981,
+ "eval_pr_auc": 0.6872164392300586,
+ "eval_precision": 0.7092127303182579,
+ "eval_precision_macro": 0.8251152664358777,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8151448467381539,
+ "eval_runtime": 0.1776,
+ "eval_samples_per_second": 917.947,
+ "eval_steps_per_second": 5.632,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7462
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9311948528628156,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959237343852729,
+ "eval_f1_macro": 0.8201439915761322,
+ "eval_loss": 0.26804664731025696,
+ "eval_pr_auc": 0.6873402512916988,
+ "eval_precision": 0.7096882333221589,
+ "eval_precision_macro": 0.8253565529811274,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.815205207696686,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.503,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 288.46153846153845,
+ "grad_norm": 18250.5078125,
+ "learning_rate": 4.5321317063898914e-07,
+ "loss": 0.2101,
+ "step": 7500
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9312627857055362,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6958949096880132,
+ "eval_f1_macro": 0.8200944800500465,
+ "eval_loss": 0.26791396737098694,
+ "eval_pr_auc": 0.6879250403674073,
+ "eval_precision": 0.7089327534292406,
+ "eval_precision_macro": 0.8250281609942534,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.8154069624000669,
+ "eval_runtime": 0.2277,
+ "eval_samples_per_second": 715.925,
+ "eval_steps_per_second": 4.392,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7514
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9312241087546806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696522309711286,
+ "eval_f1_macro": 0.8204332366847645,
+ "eval_loss": 0.26787513494491577,
+ "eval_pr_auc": 0.6878638814996979,
+ "eval_precision": 0.7088480801335559,
+ "eval_precision_macro": 0.8250951850316913,
+ "eval_pred_class_0": 16673,
+ "eval_pred_class_1": 2995,
+ "eval_predicted_binding_ratio": 0.15227781167378482,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.815991554682425,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.836,
+ "eval_steps_per_second": 4.52,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7540
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9312868249779602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975369458128079,
+ "eval_f1_macro": 0.8210658921448086,
+ "eval_loss": 0.26807519793510437,
+ "eval_pr_auc": 0.6876156626538744,
+ "eval_precision": 0.7106055536968886,
+ "eval_precision_macro": 0.8260144502101566,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8163640563475095,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.522,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7566
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9313246454689077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971935007385525,
+ "eval_f1_macro": 0.8208465473190101,
+ "eval_loss": 0.26810526847839355,
+ "eval_pr_auc": 0.6877564948921628,
+ "eval_precision": 0.7098930481283422,
+ "eval_precision_macro": 0.8256529284776994,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162735149097116,
+ "eval_runtime": 0.206,
+ "eval_samples_per_second": 791.322,
+ "eval_steps_per_second": 4.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7592
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9313187962370344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969647251845775,
+ "eval_f1_macro": 0.8207004065741184,
+ "eval_loss": 0.26795074343681335,
+ "eval_pr_auc": 0.6879453119558532,
+ "eval_precision": 0.7094188376753507,
+ "eval_precision_macro": 0.8254123095657551,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162131539511794,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.954,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7618
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9313842959550158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966809069996713,
+ "eval_f1_macro": 0.820581055003595,
+ "eval_loss": 0.2678394019603729,
+ "eval_pr_auc": 0.6884837475836854,
+ "eval_precision": 0.7102177554438861,
+ "eval_precision_macro": 0.8257076908850431,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157191031066195,
+ "eval_runtime": 0.1788,
+ "eval_samples_per_second": 911.642,
+ "eval_steps_per_second": 5.593,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7644
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314417858263554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696337658071933,
+ "eval_f1_macro": 0.8203618088536944,
+ "eval_loss": 0.26788315176963806,
+ "eval_pr_auc": 0.6885944465147925,
+ "eval_precision": 0.7095046854082999,
+ "eval_precision_macro": 0.8253458678840061,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156285616688215,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.832,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7670
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314500389854711,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696636587366694,
+ "eval_f1_macro": 0.8205062543343502,
+ "eval_loss": 0.2678987681865692,
+ "eval_pr_auc": 0.6885305487751676,
+ "eval_precision": 0.7090848363393454,
+ "eval_precision_macro": 0.8252153220919469,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816021735161691,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 639.002,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7696
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.931550575699698,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965959546127282,
+ "eval_f1_macro": 0.8205577878611678,
+ "eval_loss": 0.2679198086261749,
+ "eval_pr_auc": 0.6889351423284887,
+ "eval_precision": 0.710738255033557,
+ "eval_precision_macro": 0.8259168264621285,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8154871679239726,
+ "eval_runtime": 0.2409,
+ "eval_samples_per_second": 676.584,
+ "eval_steps_per_second": 4.151,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9316071409836368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964256300444738,
+ "eval_f1_macro": 0.82051102635547,
+ "eval_loss": 0.2680239677429199,
+ "eval_pr_auc": 0.6888956424322248,
+ "eval_precision": 0.7117845117845117,
+ "eval_precision_macro": 0.8263378182350514,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8150232975586785,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.263,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7748
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9315966104197652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6972235912600624,
+ "eval_f1_macro": 0.8208966763783243,
+ "eval_loss": 0.26803261041641235,
+ "eval_pr_auc": 0.6887732742755047,
+ "eval_precision": 0.7106496985934361,
+ "eval_precision_macro": 0.8259818448607991,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8160717602063305,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.12,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7774
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9315960653998236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966218432272876,
+ "eval_f1_macro": 0.8204813289787078,
+ "eval_loss": 0.26792290806770325,
+ "eval_pr_auc": 0.6890570542847262,
+ "eval_precision": 0.7087087087087087,
+ "eval_precision_macro": 0.8250519729735134,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8161226125133815,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.694,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7800
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9316357350655727,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966957093539372,
+ "eval_f1_macro": 0.8206059967994839,
+ "eval_loss": 0.26809555292129517,
+ "eval_pr_auc": 0.6889378655811479,
+ "eval_precision": 0.710596914822267,
+ "eval_precision_macro": 0.8258725914156881,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815618225754929,
+ "eval_runtime": 0.2178,
+ "eval_samples_per_second": 748.303,
+ "eval_steps_per_second": 4.591,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7826
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9317000084886855,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965811965811965,
+ "eval_f1_macro": 0.8205328694321837,
+ "eval_loss": 0.26798245310783386,
+ "eval_pr_auc": 0.6894273728032447,
+ "eval_precision": 0.7103586992960107,
+ "eval_precision_macro": 0.8257517200405735,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815588045275663,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.477,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7852
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9316820812256066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966513460275772,
+ "eval_f1_macro": 0.8205311837826491,
+ "eval_loss": 0.26780617237091064,
+ "eval_pr_auc": 0.6897785082602487,
+ "eval_precision": 0.7094617184887997,
+ "eval_precision_macro": 0.8253790573615671,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159208578100005,
+ "eval_runtime": 0.1902,
+ "eval_samples_per_second": 856.826,
+ "eval_steps_per_second": 5.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7878
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9317363691047894,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.26783081889152527,
+ "eval_pr_auc": 0.690108350201664,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.745,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7904
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318277475374976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962670613385956,
+ "eval_f1_macro": 0.8203632705580364,
+ "eval_loss": 0.267810195684433,
+ "eval_pr_auc": 0.6906198234983021,
+ "eval_precision": 0.7104026845637584,
+ "eval_precision_macro": 0.82571907957814,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8152957491344841,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.513,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7930
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318495094051658,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26770758628845215,
+ "eval_pr_auc": 0.6907229577841941,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.459,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9318431735483448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6973381531383503,
+ "eval_f1_macro": 0.8209698284488745,
+ "eval_loss": 0.26784345507621765,
+ "eval_pr_auc": 0.6903028236900399,
+ "eval_precision": 0.7108877721943049,
+ "eval_precision_macro": 0.8261026405178202,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161019406855965,
+ "eval_runtime": 0.2339,
+ "eval_samples_per_second": 696.845,
+ "eval_steps_per_second": 4.275,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7982
+ },
+ {
+ "epoch": 307.6923076923077,
+ "grad_norm": 18753.48046875,
+ "learning_rate": 3.8700127731844033e-07,
+ "loss": 0.2071,
+ "step": 8000
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318915635331595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966661192314009,
+ "eval_f1_macro": 0.8205561173351938,
+ "eval_loss": 0.2679731547832489,
+ "eval_pr_auc": 0.6902808443840289,
+ "eval_precision": 0.7098393574297188,
+ "eval_precision_macro": 0.8255431799139001,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81581998045831,
+ "eval_runtime": 0.2227,
+ "eval_samples_per_second": 731.952,
+ "eval_steps_per_second": 4.491,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8008
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319105808361218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975217462662071,
+ "eval_f1_macro": 0.821040751603759,
+ "eval_loss": 0.2678290605545044,
+ "eval_pr_auc": 0.6904266123808676,
+ "eval_precision": 0.7102272727272727,
+ "eval_precision_macro": 0.8258500239865676,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164649336992,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.086,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8034
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319563819762139,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977201902575036,
+ "eval_f1_macro": 0.821136637744354,
+ "eval_loss": 0.2677942216396332,
+ "eval_pr_auc": 0.6905770539125178,
+ "eval_precision": 0.7099465954606141,
+ "eval_precision_macro": 0.8257626451391362,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.816727049361113,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.991,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8060
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9320261250637406,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975673898750822,
+ "eval_f1_macro": 0.8211161862162611,
+ "eval_loss": 0.2678627669811249,
+ "eval_pr_auc": 0.6908896136415948,
+ "eval_precision": 0.7113643982567884,
+ "eval_precision_macro": 0.8263444706297427,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161623016441286,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.505,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8086
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9320056478859348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696551724137931,
+ "eval_f1_macro": 0.8204830448879512,
+ "eval_loss": 0.2678248882293701,
+ "eval_pr_auc": 0.6908301522653787,
+ "eval_precision": 0.7096018735362998,
+ "eval_precision_macro": 0.8254226766806146,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815789799979044,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.086,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8112
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9319988935316585,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.2678254544734955,
+ "eval_pr_auc": 0.6908227024589884,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.545,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8138
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9320198184044164,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971634694212165,
+ "eval_f1_macro": 0.8207964351950081,
+ "eval_loss": 0.2678711414337158,
+ "eval_pr_auc": 0.6909501280556708,
+ "eval_precision": 0.7091394262841895,
+ "eval_precision_macro": 0.8253255619723288,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8164752696130925,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.201,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8164
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9320753325784678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2678229808807373,
+ "eval_pr_auc": 0.6914105766155315,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.375,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9321298151076297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967953985209532,
+ "eval_f1_macro": 0.8206541727499956,
+ "eval_loss": 0.26796844601631165,
+ "eval_pr_auc": 0.6912288183485439,
+ "eval_precision": 0.710455764075067,
+ "eval_precision_macro": 0.8258284574391159,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157492835858855,
+ "eval_runtime": 0.2237,
+ "eval_samples_per_second": 728.567,
+ "eval_steps_per_second": 4.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8216
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9321722877330785,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6956235603817045,
+ "eval_f1_macro": 0.8199989231339035,
+ "eval_loss": 0.2681267261505127,
+ "eval_pr_auc": 0.6912998271961284,
+ "eval_precision": 0.7101108498488411,
+ "eval_precision_macro": 0.8254885924997606,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8148120342038165,
+ "eval_runtime": 0.1877,
+ "eval_samples_per_second": 868.412,
+ "eval_steps_per_second": 5.328,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8242
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9321751880177678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26805615425109863,
+ "eval_pr_auc": 0.6913470747613989,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.932,
+ "eval_steps_per_second": 4.122,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8268
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9321843949617812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961380443714051,
+ "eval_f1_macro": 0.8202653471082613,
+ "eval_loss": 0.267932653427124,
+ "eval_pr_auc": 0.6916487257323465,
+ "eval_precision": 0.7097855227882037,
+ "eval_precision_macro": 0.8254333991308556,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8153664460069084,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.38,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8294
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.932188093311385,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964373666064686,
+ "eval_f1_macro": 0.8204099902666875,
+ "eval_loss": 0.2679634094238281,
+ "eval_pr_auc": 0.6915655068510385,
+ "eval_precision": 0.7093645484949833,
+ "eval_precision_macro": 0.8253022526621696,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815759619499778,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.448,
+ "eval_steps_per_second": 4.027,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8320
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9321906237611137,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965076242006887,
+ "eval_f1_macro": 0.8204083386821658,
+ "eval_loss": 0.26790735125541687,
+ "eval_pr_auc": 0.6917736045731879,
+ "eval_precision": 0.7084723148765844,
+ "eval_precision_macro": 0.8249320182661266,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160924320341154,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.581,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8346
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.932275423024527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6955950032873109,
+ "eval_f1_macro": 0.8199495526481063,
+ "eval_loss": 0.2680682837963104,
+ "eval_pr_auc": 0.6919499023976591,
+ "eval_precision": 0.709353000335233,
+ "eval_precision_macro": 0.8251589694514043,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8150137889071974,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.703,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8372
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9322442887603632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6918309607756195,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.368,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8398
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9322356073712934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960945191992124,
+ "eval_f1_macro": 0.8201909332654507,
+ "eval_loss": 0.2680003046989441,
+ "eval_pr_auc": 0.6918708088537314,
+ "eval_precision": 0.7086535248914133,
+ "eval_precision_macro": 0.8249414550993799,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81566907806198,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.473,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322670238779269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961235216819974,
+ "eval_f1_macro": 0.8202405385118361,
+ "eval_loss": 0.26794886589050293,
+ "eval_pr_auc": 0.6921960622354616,
+ "eval_precision": 0.7094074322062269,
+ "eval_precision_macro": 0.8252690299332196,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815467323358599,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.5,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8450
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322930096501425,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2680268883705139,
+ "eval_pr_auc": 0.6921477872574622,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.1733,
+ "eval_samples_per_second": 940.367,
+ "eval_steps_per_second": 5.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8476
+ },
+ {
+ "epoch": 326.9230769230769,
+ "grad_norm": 17241.076171875,
+ "learning_rate": 3.2282309449959705e-07,
+ "loss": 0.2047,
+ "step": 8500
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323445529646195,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26804205775260925,
+ "eval_pr_auc": 0.6924439463998024,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.47,
+ "eval_steps_per_second": 5.647,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8502
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323743441439272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26802849769592285,
+ "eval_pr_auc": 0.6925977669253861,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1678,
+ "eval_samples_per_second": 971.421,
+ "eval_steps_per_second": 5.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8528
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9323637649175607,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983295119554537,
+ "eval_f1_macro": 0.8214488366277419,
+ "eval_loss": 0.2680566608905792,
+ "eval_pr_auc": 0.6924500045026715,
+ "eval_precision": 0.7094841930116472,
+ "eval_precision_macro": 0.825665699698526,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8174125189951615,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.54,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8554
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.906243644498678,
+ "eval_auc": 0.9324242037360844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6974072858549393,
+ "eval_f1_macro": 0.8209676462967013,
+ "eval_loss": 0.26804018020629883,
+ "eval_pr_auc": 0.6928556723686659,
+ "eval_precision": 0.7099899766120948,
+ "eval_precision_macro": 0.8257296209897056,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164347532199341,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.585,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8580
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9323838917254041,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6981162981162982,
+ "eval_f1_macro": 0.8213280175544326,
+ "eval_loss": 0.2678423821926117,
+ "eval_pr_auc": 0.6929480254197629,
+ "eval_precision": 0.7093874833555259,
+ "eval_precision_macro": 0.8255890849326837,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817251280684939,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.814,
+ "eval_steps_per_second": 4.042,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8606
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9323530591687079,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993784756297023,
+ "eval_f1_macro": 0.8220268454242666,
+ "eval_loss": 0.26779934763908386,
+ "eval_pr_auc": 0.6928104051729911,
+ "eval_precision": 0.7095917690009956,
+ "eval_precision_macro": 0.8258856473344816,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183195878979646,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.815,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8632
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.932432009200248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983450761920367,
+ "eval_f1_macro": 0.8214741659959972,
+ "eval_loss": 0.2679860591888428,
+ "eval_pr_auc": 0.6929996618872077,
+ "eval_precision": 0.7098600932711525,
+ "eval_precision_macro": 0.8258288825890143,
+ "eval_pred_class_0": 16666,
+ "eval_pred_class_1": 3002,
+ "eval_predicted_binding_ratio": 0.1526337197478137,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817311641643471,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.69,
+ "eval_steps_per_second": 5.55,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325279911049631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975521603417119,
+ "eval_f1_macro": 0.8210910370116632,
+ "eval_loss": 0.26808273792266846,
+ "eval_pr_auc": 0.6933573725062909,
+ "eval_precision": 0.7109845947756196,
+ "eval_precision_macro": 0.8261792653772595,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816263178995819,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.721,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8684
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932545276023111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6976362442547603,
+ "eval_f1_macro": 0.8211138747443938,
+ "eval_loss": 0.26816996932029724,
+ "eval_pr_auc": 0.6932853419412803,
+ "eval_precision": 0.710464727515881,
+ "eval_precision_macro": 0.8259705061096825,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816495114178466,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.649,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8710
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9325305604846879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680180072784424,
+ "eval_pr_auc": 0.6935243427052671,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.515,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8736
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932509878924404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.698032786885246,
+ "eval_f1_macro": 0.8213054775682699,
+ "eval_loss": 0.2678954601287842,
+ "eval_pr_auc": 0.6935428752602653,
+ "eval_precision": 0.7099033011003668,
+ "eval_precision_macro": 0.8257957323787274,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.817019345502292,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.491,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8762
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325608674864403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989176779271893,
+ "eval_f1_macro": 0.8218398486513014,
+ "eval_loss": 0.26806220412254333,
+ "eval_pr_auc": 0.6935170525264562,
+ "eval_precision": 0.7110443777110443,
+ "eval_precision_macro": 0.8264297528888735,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.8174625440398011,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.3,
+ "eval_steps_per_second": 5.658,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8788
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325807801793065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990163934426229,
+ "eval_f1_macro": 0.8218875444165816,
+ "eval_loss": 0.26803991198539734,
+ "eval_pr_auc": 0.6936355510877774,
+ "eval_precision": 0.7109036345448483,
+ "eval_precision_macro": 0.8263858865027319,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8175936018707576,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.235,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8814
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326051308916972,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983762506150566,
+ "eval_f1_macro": 0.8215248382050282,
+ "eval_loss": 0.26792144775390625,
+ "eval_pr_auc": 0.6939264625162457,
+ "eval_precision": 0.7106141522029372,
+ "eval_precision_macro": 0.8261564043164398,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8171098869400901,
+ "eval_runtime": 0.2744,
+ "eval_samples_per_second": 593.925,
+ "eval_steps_per_second": 3.644,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8840
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325905710732574,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6980482204362801,
+ "eval_f1_macro": 0.821330737974691,
+ "eval_loss": 0.26793381571769714,
+ "eval_pr_auc": 0.6938155556406347,
+ "eval_precision": 0.7102803738317757,
+ "eval_precision_macro": 0.825959524727788,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169184681506015,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.704,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8866
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9325911160931989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997542997542997,
+ "eval_f1_macro": 0.8222974802915219,
+ "eval_loss": 0.26800957322120667,
+ "eval_pr_auc": 0.6936597261010234,
+ "eval_precision": 0.711051930758988,
+ "eval_precision_macro": 0.8265713326382553,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8182083746323816,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 923.118,
+ "eval_steps_per_second": 5.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8892
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326288587241547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680290937423706,
+ "eval_pr_auc": 0.6938544134850919,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.353,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8918
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9326493359019604,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982617251557888,
+ "eval_f1_macro": 0.8214517001734176,
+ "eval_loss": 0.2680736482143402,
+ "eval_pr_auc": 0.6938105747317047,
+ "eval_precision": 0.710377043710377,
+ "eval_precision_macro": 0.8260361014844849,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8170797064608241,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.605,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8944
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9326923535473509,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977507798391068,
+ "eval_f1_macro": 0.8211870157279457,
+ "eval_loss": 0.2681424021720886,
+ "eval_pr_auc": 0.6938928037843046,
+ "eval_precision": 0.7107023411371237,
+ "eval_precision_macro": 0.8260910674386901,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816525294657732,
+ "eval_runtime": 0.2453,
+ "eval_samples_per_second": 664.523,
+ "eval_steps_per_second": 4.077,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8970
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9327347288478101,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6979799638692724,
+ "eval_f1_macro": 0.821333351261192,
+ "eval_loss": 0.2682046592235565,
+ "eval_pr_auc": 0.6939995353512864,
+ "eval_precision": 0.7111780455153949,
+ "eval_precision_macro": 0.8263324280334768,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8165856556162641,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.642,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8996
+ },
+ {
+ "epoch": 346.15384615384613,
+ "grad_norm": 18666.783203125,
+ "learning_rate": 2.618336781094791e-07,
+ "loss": 0.2031,
+ "step": 9000
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9326942611171465,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983139630054018,
+ "eval_f1_macro": 0.8214235117341392,
+ "eval_loss": 0.2681069076061249,
+ "eval_pr_auc": 0.6938781737586003,
+ "eval_precision": 0.7091090425531915,
+ "eval_precision_macro": 0.8255029006283365,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.817513396346852,
+ "eval_runtime": 0.1933,
+ "eval_samples_per_second": 843.456,
+ "eval_steps_per_second": 5.175,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9022
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9326940859321652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.26795056462287903,
+ "eval_pr_auc": 0.6940722215736992,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.287,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9048
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9327572303853989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.267974317073822,
+ "eval_pr_auc": 0.6943572217621294,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.819,
+ "eval_steps_per_second": 3.79,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9074
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327859996523161,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6985064828491712,
+ "eval_f1_macro": 0.8216233644580062,
+ "eval_loss": 0.2680947780609131,
+ "eval_pr_auc": 0.6943020020881897,
+ "eval_precision": 0.7112299465240641,
+ "eval_precision_macro": 0.8264413105131714,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170391900676656,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.86,
+ "eval_steps_per_second": 4.153,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9100
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9328183504788495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6986211424819435,
+ "eval_f1_macro": 0.8216965657061384,
+ "eval_loss": 0.2681223750114441,
+ "eval_pr_auc": 0.694450730809475,
+ "eval_precision": 0.7114677365429622,
+ "eval_precision_macro": 0.8265619548577976,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170693705469316,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.919,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9126
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9328099415997506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6945293388795055,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.1729,
+ "eval_samples_per_second": 942.612,
+ "eval_steps_per_second": 5.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9152
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9328013186056745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000819000819001,
+ "eval_f1_macro": 0.8224913728389398,
+ "eval_loss": 0.26793336868286133,
+ "eval_pr_auc": 0.6945633927009858,
+ "eval_precision": 0.7113848202396804,
+ "eval_precision_macro": 0.8267677821793697,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183997934218701,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.431,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9178
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.932783060437631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26802781224250793,
+ "eval_pr_auc": 0.694307607889245,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.039,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9204
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327856687473517,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997875469848014,
+ "eval_f1_macro": 0.8222422697443199,
+ "eval_loss": 0.2679577171802521,
+ "eval_pr_auc": 0.6944152708065687,
+ "eval_precision": 0.7094102054340623,
+ "eval_precision_macro": 0.8258762738882024,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8187429418701,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.379,
+ "eval_steps_per_second": 5.352,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9230
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9328543023299973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26801130175590515,
+ "eval_pr_auc": 0.6946173571124205,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1679,
+ "eval_samples_per_second": 971.014,
+ "eval_steps_per_second": 5.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9256
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328912663610364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002947920078612,
+ "eval_f1_macro": 0.8226120363891247,
+ "eval_loss": 0.26803824305534363,
+ "eval_pr_auc": 0.6948368351435844,
+ "eval_precision": 0.7114808652246256,
+ "eval_precision_macro": 0.8268440754137292,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8185610317320926,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.12,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9282
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328766286826047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003929273084479,
+ "eval_f1_macro": 0.8226594466805872,
+ "eval_loss": 0.2680445909500122,
+ "eval_pr_auc": 0.6947543185413181,
+ "eval_precision": 0.711340206185567,
+ "eval_precision_macro": 0.8268002873554328,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186920895630492,
+ "eval_runtime": 0.2229,
+ "eval_samples_per_second": 731.38,
+ "eval_steps_per_second": 4.487,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9308
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9328791980623294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.2679673135280609,
+ "eval_pr_auc": 0.694877850403851,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 922.941,
+ "eval_steps_per_second": 5.662,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9334
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9328811640271188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680214047431946,
+ "eval_pr_auc": 0.6947522638420174,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.2235,
+ "eval_samples_per_second": 729.222,
+ "eval_steps_per_second": 4.474,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9360
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.932900882070006,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000163478829492,
+ "eval_f1_macro": 0.8223884382480462,
+ "eval_loss": 0.26796379685401917,
+ "eval_pr_auc": 0.6949176089109205,
+ "eval_precision": 0.7098806366047745,
+ "eval_precision_macro": 0.8261149519800236,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188033028286321,
+ "eval_runtime": 0.1694,
+ "eval_samples_per_second": 962.09,
+ "eval_steps_per_second": 5.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9386
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9329447561753046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26795387268066406,
+ "eval_pr_auc": 0.6950891938157696,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.968,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9412
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329731556072616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001636661211129,
+ "eval_f1_macro": 0.8225130616165066,
+ "eval_loss": 0.26798126101493835,
+ "eval_pr_auc": 0.6952376733836106,
+ "eval_precision": 0.7108673978065803,
+ "eval_precision_macro": 0.8265604171937038,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818631728604517,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.549,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9438
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9329669754704237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008994276369583,
+ "eval_f1_macro": 0.8229219452383643,
+ "eval_loss": 0.26792433857917786,
+ "eval_pr_auc": 0.6952423959524336,
+ "eval_precision": 0.7110152621101526,
+ "eval_precision_macro": 0.8267457720422265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8192465013661412,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.085,
+ "eval_steps_per_second": 3.847,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9464
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329833552661686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.2679993808269501,
+ "eval_pr_auc": 0.6952272078548911,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.317,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9490
+ },
+ {
+ "epoch": 365.38461538461536,
+ "grad_norm": 18768.416015625,
+ "learning_rate": 2.0513069380006943e-07,
+ "loss": 0.2014,
+ "step": 9500
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330138179879044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003763704794632,
+ "eval_f1_macro": 0.822633632944773,
+ "eval_loss": 0.26798829436302185,
+ "eval_pr_auc": 0.6953770669982303,
+ "eval_precision": 0.7109634551495017,
+ "eval_precision_macro": 0.8266367281750631,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187929669147396,
+ "eval_runtime": 0.198,
+ "eval_samples_per_second": 823.426,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9516
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9330281442263691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005890052356021,
+ "eval_f1_macro": 0.8227541703278901,
+ "eval_loss": 0.2679848372936249,
+ "eval_pr_auc": 0.6954512195878219,
+ "eval_precision": 0.7110594486881435,
+ "eval_precision_macro": 0.8267130106501293,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189542052249621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.184,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9542
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330502564639999,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2679852545261383,
+ "eval_pr_auc": 0.6955032656447977,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.651,
+ "eval_steps_per_second": 3.887,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9568
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330669768972081,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680181562900543,
+ "eval_pr_auc": 0.6955153529738297,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2378,
+ "eval_samples_per_second": 685.421,
+ "eval_steps_per_second": 4.205,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9594
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9330907631246593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008015704236872,
+ "eval_f1_macro": 0.8228746737830142,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6956635287744017,
+ "eval_precision": 0.7111553784860558,
+ "eval_precision_macro": 0.8267892646512892,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8191154435351846,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.529,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9620
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331252940309591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999672453324599,
+ "eval_f1_macro": 0.8224181697622275,
+ "eval_loss": 0.26801252365112305,
+ "eval_pr_auc": 0.6958269523241747,
+ "eval_precision": 0.7111480865224625,
+ "eval_precision_macro": 0.8266476794611952,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183696129426041,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.791,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9646
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9331262672808548,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26803725957870483,
+ "eval_pr_auc": 0.6957878915651574,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.266,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9672
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9331573528825239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26807889342308044,
+ "eval_pr_auc": 0.6959044832644976,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.113,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9698
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331759224905339,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.2681734561920166,
+ "eval_pr_auc": 0.695850524773773,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.284,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9724
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933143698186487,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2681812345981598,
+ "eval_pr_auc": 0.6956723886102156,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 914.221,
+ "eval_steps_per_second": 5.609,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331405935193198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002123835974514,
+ "eval_f1_macro": 0.8224831299290885,
+ "eval_loss": 0.2681320309638977,
+ "eval_pr_auc": 0.6957066245846253,
+ "eval_precision": 0.7096026490066225,
+ "eval_precision_macro": 0.8260290996114323,
+ "eval_pred_class_0": 16648,
+ "eval_pred_class_1": 3020,
+ "eval_predicted_binding_ratio": 0.1535489119381737,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.819065418490545,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.75,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9776
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331510656881976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010443864229765,
+ "eval_f1_macro": 0.8229384784439624,
+ "eval_loss": 0.2682173550128937,
+ "eval_pr_auc": 0.6955658216411763,
+ "eval_precision": 0.709613478691774,
+ "eval_precision_macro": 0.8261726428372638,
+ "eval_pred_class_0": 16641,
+ "eval_pred_class_1": 3027,
+ "eval_predicted_binding_ratio": 0.15390482001220257,
+ "eval_recall": 0.6926797807158981,
+ "eval_recall_macro": 0.8198112490831255,
+ "eval_runtime": 0.2446,
+ "eval_samples_per_second": 666.285,
+ "eval_steps_per_second": 4.088,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9802
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9331855381995037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994767822105952,
+ "eval_f1_macro": 0.8220743333087894,
+ "eval_loss": 0.2681441009044647,
+ "eval_pr_auc": 0.6959256270686769,
+ "eval_precision": 0.709452736318408,
+ "eval_precision_macro": 0.8258426835378145,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184506457289211,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.04,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9828
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9331895674540718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003267973856209,
+ "eval_f1_macro": 0.8225562214288413,
+ "eval_loss": 0.26809969544410706,
+ "eval_pr_auc": 0.6959934558189067,
+ "eval_precision": 0.7098376946008612,
+ "eval_precision_macro": 0.8261483505739005,
+ "eval_pred_class_0": 16649,
+ "eval_pred_class_1": 3019,
+ "eval_predicted_binding_ratio": 0.15349806792759813,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8190955989698111,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.005,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9854
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332175581210724,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26810789108276367,
+ "eval_pr_auc": 0.6960299382483204,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.1782,
+ "eval_samples_per_second": 914.547,
+ "eval_steps_per_second": 5.611,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9880
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332336167443518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.2680502235889435,
+ "eval_pr_auc": 0.6962912618518755,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.169,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9906
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332394951737218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26803064346313477,
+ "eval_pr_auc": 0.696271279785013,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.1775,
+ "eval_samples_per_second": 918.497,
+ "eval_steps_per_second": 5.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9932
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332396703587031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2680654227733612,
+ "eval_pr_auc": 0.6962410474404584,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.85,
+ "eval_steps_per_second": 4.036,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9958
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332586000691747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.26804468035697937,
+ "eval_pr_auc": 0.6963737898708651,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.1642,
+ "eval_samples_per_second": 992.895,
+ "eval_steps_per_second": 6.091,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9984
+ },
+ {
+ "epoch": 384.61538461538464,
+ "grad_norm": 19506.416015625,
+ "learning_rate": 1.5373466155541264e-07,
+ "loss": 0.1999,
+ "step": 10000
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332885664334637,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26817384362220764,
+ "eval_pr_auc": 0.6963813810367415,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.846,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10010
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332890919884074,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995251350908793,
+ "eval_f1_macro": 0.8221511437890823,
+ "eval_loss": 0.2681432366371155,
+ "eval_pr_auc": 0.6963686032935126,
+ "eval_precision": 0.7105788423153693,
+ "eval_precision_macro": 0.8263313128873689,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181480136738496,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 609.003,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10036
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332816368942063,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681044936180115,
+ "eval_pr_auc": 0.6963340299375516,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2503,
+ "eval_samples_per_second": 651.154,
+ "eval_steps_per_second": 3.995,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10062
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332853547088078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26804089546203613,
+ "eval_pr_auc": 0.6964477494759991,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.349,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10088
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333026785569515,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26805874705314636,
+ "eval_pr_auc": 0.6965416515768459,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1895,
+ "eval_samples_per_second": 860.093,
+ "eval_steps_per_second": 5.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10114
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332940944928713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26800334453582764,
+ "eval_pr_auc": 0.6965549091166009,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.7,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10140
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332915835081403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699591169255928,
+ "eval_f1_macro": 0.8221474102804127,
+ "eval_loss": 0.26801303029060364,
+ "eval_pr_auc": 0.69649915263674,
+ "eval_precision": 0.7096881220968813,
+ "eval_precision_macro": 0.8259621107662262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184808262081871,
+ "eval_runtime": 0.2322,
+ "eval_samples_per_second": 702.068,
+ "eval_steps_per_second": 4.307,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10166
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9333394576705105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680598199367523,
+ "eval_pr_auc": 0.6966844521188784,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.281,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10192
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333311363839021,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26812389492988586,
+ "eval_pr_auc": 0.696579843318821,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.459,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10218
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9333747574442278,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26815417408943176,
+ "eval_pr_auc": 0.6966758102563304,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.797,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10244
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.93337473797923,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26812419295310974,
+ "eval_pr_auc": 0.6967071460749926,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2546,
+ "eval_samples_per_second": 640.225,
+ "eval_steps_per_second": 3.928,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10270
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333609762257046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6966270582247817,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1654,
+ "eval_samples_per_second": 985.392,
+ "eval_steps_per_second": 6.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10296
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333962857319209,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.268099308013916,
+ "eval_pr_auc": 0.6968255966064625,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1974,
+ "eval_samples_per_second": 825.595,
+ "eval_steps_per_second": 5.065,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10322
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9334254248337986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6987714987714988,
+ "eval_f1_macro": 0.8217158026492684,
+ "eval_loss": 0.26809167861938477,
+ "eval_pr_auc": 0.6970114900505864,
+ "eval_precision": 0.7100532623169108,
+ "eval_precision_macro": 0.8259819840149124,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.8176341182639161,
+ "eval_runtime": 0.1847,
+ "eval_samples_per_second": 882.621,
+ "eval_steps_per_second": 5.415,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10348
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334154587548664,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26815035939216614,
+ "eval_pr_auc": 0.6968381832516852,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.281,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10374
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334343592678412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991976420501065,
+ "eval_f1_macro": 0.821957303073866,
+ "eval_loss": 0.26815110445022583,
+ "eval_pr_auc": 0.6969254623478301,
+ "eval_precision": 0.7102461743180306,
+ "eval_precision_macro": 0.826134970486347,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.817956594884361,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.319,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10400
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334508655860725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681412398815155,
+ "eval_pr_auc": 0.6970213943546584,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2502,
+ "eval_samples_per_second": 651.598,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10426
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334578048578289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.26816073060035706,
+ "eval_pr_auc": 0.697032431219364,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2131,
+ "eval_samples_per_second": 764.767,
+ "eval_steps_per_second": 4.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10452
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334644910846125,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6971117657586055,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.1917,
+ "eval_samples_per_second": 850.439,
+ "eval_steps_per_second": 5.217,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10478
+ },
+ {
+ "epoch": 403.84615384615387,
+ "grad_norm": 20065.328125,
+ "learning_rate": 1.0857058873879127e-07,
+ "loss": 0.1991,
+ "step": 10500
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334760435608745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26816511154174805,
+ "eval_pr_auc": 0.6971952135976213,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.311,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10504
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334837419675497,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26816821098327637,
+ "eval_pr_auc": 0.6972179050703514,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.823,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10530
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334893478869489,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.2681950330734253,
+ "eval_pr_auc": 0.6972103120395237,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.636,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10556
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334909050867821,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2681744396686554,
+ "eval_pr_auc": 0.6972816477778223,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.723,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10582
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933494973271346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.2681480348110199,
+ "eval_pr_auc": 0.697380910091478,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.704,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10608
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334896009319218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680969536304474,
+ "eval_pr_auc": 0.6974108729960042,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2261,
+ "eval_samples_per_second": 721.003,
+ "eval_steps_per_second": 4.423,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10634
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334926569365942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26808932423591614,
+ "eval_pr_auc": 0.6974463532877748,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.923,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10660
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334912943867403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680690288543701,
+ "eval_pr_auc": 0.6974656777279113,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.181,
+ "eval_samples_per_second": 900.801,
+ "eval_steps_per_second": 5.526,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10686
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334969197711376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26803234219551086,
+ "eval_pr_auc": 0.6975509824558107,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.403,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10712
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334973090710958,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26804181933403015,
+ "eval_pr_auc": 0.697539016898834,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.163,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10738
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335163069090602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2680352032184601,
+ "eval_pr_auc": 0.697653341121327,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.004,
+ "eval_steps_per_second": 4.184,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10764
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335094357647963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680439054965973,
+ "eval_pr_auc": 0.6975754183405896,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.063,
+ "eval_steps_per_second": 3.927,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10790
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335170855089767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26802244782447815,
+ "eval_pr_auc": 0.6976835126920541,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.122,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10816
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335191877287514,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26805901527404785,
+ "eval_pr_auc": 0.6976076287719296,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.253,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10842
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335267206829443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6976721144908643,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1992,
+ "eval_samples_per_second": 818.364,
+ "eval_steps_per_second": 5.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10868
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9335306331475249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006869479882237,
+ "eval_f1_macro": 0.8228014837466855,
+ "eval_loss": 0.2680812180042267,
+ "eval_pr_auc": 0.6976481882085733,
+ "eval_precision": 0.7109193494855626,
+ "eval_precision_macro": 0.8266695216356063,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190852630559187,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.627,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10894
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335429739562026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6976080200264206,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2232,
+ "eval_samples_per_second": 730.143,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10920
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9335431686061818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681698799133301,
+ "eval_pr_auc": 0.6975308484626278,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1913,
+ "eval_samples_per_second": 852.153,
+ "eval_steps_per_second": 5.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10946
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335561517597906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681558430194855,
+ "eval_pr_auc": 0.6975926126749412,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.252,
+ "eval_samples_per_second": 646.818,
+ "eval_steps_per_second": 3.968,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10972
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335701957557857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.2681851089000702,
+ "eval_pr_auc": 0.6976584248764129,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.463,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10998
+ },
+ {
+ "epoch": 423.0769230769231,
+ "grad_norm": 19880.513671875,
+ "learning_rate": 7.045132214180816e-08,
+ "loss": 0.198,
+ "step": 11000
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933567850223537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.2681238353252411,
+ "eval_pr_auc": 0.6977041251052366,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.736,
+ "eval_steps_per_second": 5.551,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11024
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335591688344673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26810285449028015,
+ "eval_pr_auc": 0.6976771787125668,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 673.038,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11050
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335456990559106,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680869400501251,
+ "eval_pr_auc": 0.6976467269760291,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1727,
+ "eval_samples_per_second": 943.897,
+ "eval_steps_per_second": 5.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11076
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335531541501119,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004412485700278,
+ "eval_f1_macro": 0.8226293306702985,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6976292182478663,
+ "eval_precision": 0.7100728959575878,
+ "eval_precision_macro": 0.8262676792100252,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8191257794490772,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.952,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11102
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335772907475254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813971996307373,
+ "eval_pr_auc": 0.6977601654966087,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2428,
+ "eval_samples_per_second": 671.434,
+ "eval_steps_per_second": 4.119,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11128
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335880938213678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26816368103027344,
+ "eval_pr_auc": 0.6977910817778257,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.286,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11154
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9335897288811925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681582272052765,
+ "eval_pr_auc": 0.6978031299323104,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.36,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11180
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9336081038392237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990831696136215,
+ "eval_f1_macro": 0.8218841874311036,
+ "eval_loss": 0.2681788206100464,
+ "eval_pr_auc": 0.6978648412555615,
+ "eval_precision": 0.7100099767209843,
+ "eval_precision_macro": 0.8260151318092648,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8179264144050951,
+ "eval_runtime": 0.1932,
+ "eval_samples_per_second": 843.696,
+ "eval_steps_per_second": 5.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11206
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.933607091659332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26817184686660767,
+ "eval_pr_auc": 0.6978628373729787,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2181,
+ "eval_samples_per_second": 747.26,
+ "eval_steps_per_second": 4.584,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11232
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336009699174881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26814383268356323,
+ "eval_pr_auc": 0.6978726222778764,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.607,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11258
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335997338901205,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813551783561707,
+ "eval_pr_auc": 0.6978611289947059,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1828,
+ "eval_samples_per_second": 891.596,
+ "eval_steps_per_second": 5.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11284
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335949455006336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681216299533844,
+ "eval_pr_auc": 0.6978496657032133,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.033,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11310
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933600045330087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681162655353546,
+ "eval_pr_auc": 0.6978775373770459,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.163,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11336
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336053398095198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26813197135925293,
+ "eval_pr_auc": 0.6979041519553001,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1888,
+ "eval_samples_per_second": 863.236,
+ "eval_steps_per_second": 5.296,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11362
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933606196269428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813068985939026,
+ "eval_pr_auc": 0.6979019651404079,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.854,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11388
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336083958141924,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813721656799316,
+ "eval_pr_auc": 0.6979009956604231,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.065,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11414
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336056512494865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681269645690918,
+ "eval_pr_auc": 0.697889844022227,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.569,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11440
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336110917164033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681290805339813,
+ "eval_pr_auc": 0.6979179015521837,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.824,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11466
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336152864234539,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681342363357544,
+ "eval_pr_auc": 0.697937589363129,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.511,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11492
+ },
+ {
+ "epoch": 442.3076923076923,
+ "grad_norm": 19259.90625,
+ "learning_rate": 4.0062918659231006e-08,
+ "loss": 0.1984,
+ "step": 11500
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336222548927073,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.26813840866088867,
+ "eval_pr_auc": 0.6979755820701472,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.189,
+ "eval_samples_per_second": 862.533,
+ "eval_steps_per_second": 5.292,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11518
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336241624625029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681417763233185,
+ "eval_pr_auc": 0.6980183846984902,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 913.982,
+ "eval_steps_per_second": 5.607,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11544
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933623228142603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812103390693665,
+ "eval_pr_auc": 0.6980203083536239,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.813,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11570
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933622994562628,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812058687210083,
+ "eval_pr_auc": 0.6980222288630917,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 651.994,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11596
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336267026447306,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681138813495636,
+ "eval_pr_auc": 0.6980542679927909,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 873.899,
+ "eval_steps_per_second": 5.361,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11622
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336341674714307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26811888813972473,
+ "eval_pr_auc": 0.6980920829430033,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.772,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11648
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336351017913307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001472272206772,
+ "eval_f1_macro": 0.8224873029219602,
+ "eval_loss": 0.26812025904655457,
+ "eval_pr_auc": 0.6980982354073687,
+ "eval_precision": 0.7104913678618858,
+ "eval_precision_macro": 0.8263972209146124,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187326059562077,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.262,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11674
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336350044663411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681255340576172,
+ "eval_pr_auc": 0.69809285211945,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 874.141,
+ "eval_steps_per_second": 5.363,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11700
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336387320134417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6981051991963814,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.252,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11726
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336409802207007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.26812514662742615,
+ "eval_pr_auc": 0.6981289571890097,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.1751,
+ "eval_samples_per_second": 930.856,
+ "eval_steps_per_second": 5.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11752
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336424400955443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6981354214954091,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.502,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11778
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336481628049311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6981529233150263,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2333,
+ "eval_samples_per_second": 698.566,
+ "eval_steps_per_second": 4.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11804
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336448537552857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815658807754517,
+ "eval_pr_auc": 0.6981319681162118,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1829,
+ "eval_samples_per_second": 891.289,
+ "eval_steps_per_second": 5.468,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11830
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.268153578042984,
+ "eval_pr_auc": 0.6981406301220767,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.693,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11856
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.268160343170166,
+ "eval_pr_auc": 0.6981424953255787,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.855,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11882
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336505959296704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26815977692604065,
+ "eval_pr_auc": 0.6981608628032375,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.407,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11908
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336521336645056,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815950870513916,
+ "eval_pr_auc": 0.6981611753342029,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1894,
+ "eval_samples_per_second": 860.719,
+ "eval_steps_per_second": 5.28,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11934
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336499146547433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681548595428467,
+ "eval_pr_auc": 0.6981527417806164,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2094,
+ "eval_samples_per_second": 778.376,
+ "eval_steps_per_second": 4.775,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11960
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336533599593742,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681526839733124,
+ "eval_pr_auc": 0.6981686335311912,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.254,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11986
+ },
+ {
+ "epoch": 461.53846153846155,
+ "grad_norm": 19181.365234375,
+ "learning_rate": 1.7952297882945e-08,
+ "loss": 0.1977,
+ "step": 12000
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336556178991323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26815271377563477,
+ "eval_pr_auc": 0.6981807528411922,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2521,
+ "eval_samples_per_second": 646.614,
+ "eval_steps_per_second": 3.967,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12012
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336526397544513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681511640548706,
+ "eval_pr_auc": 0.6981676703517014,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2048,
+ "eval_samples_per_second": 795.899,
+ "eval_steps_per_second": 4.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12038
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336516859695536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814842224121094,
+ "eval_pr_auc": 0.698161790632896,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.01,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12064
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336534183543679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814672350883484,
+ "eval_pr_auc": 0.6981725600302674,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.776,
+ "eval_steps_per_second": 5.354,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12090
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336528344044305,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681439220905304,
+ "eval_pr_auc": 0.6981719802867735,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.256,
+ "eval_steps_per_second": 3.94,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12116
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571167039716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814955472946167,
+ "eval_pr_auc": 0.6981930050756842,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.332,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12142
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336574573414352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814743876457214,
+ "eval_pr_auc": 0.6981957415820915,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1759,
+ "eval_samples_per_second": 926.564,
+ "eval_steps_per_second": 5.684,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12168
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336602894986317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681457996368408,
+ "eval_pr_auc": 0.6982076318844164,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1997,
+ "eval_samples_per_second": 816.139,
+ "eval_steps_per_second": 5.007,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12194
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571945639633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681419849395752,
+ "eval_pr_auc": 0.6981934072595471,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.578,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12220
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336578855713892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681434154510498,
+ "eval_pr_auc": 0.6981968133129176,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.471,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12246
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336576422589153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814115047454834,
+ "eval_pr_auc": 0.6981937092453367,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.347,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12272
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933659540096212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26814183592796326,
+ "eval_pr_auc": 0.698207050020266,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1807,
+ "eval_samples_per_second": 902.095,
+ "eval_steps_per_second": 5.534,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12298
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933661126493542,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813554763793945,
+ "eval_pr_auc": 0.6982130837277154,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2369,
+ "eval_samples_per_second": 688.198,
+ "eval_steps_per_second": 4.222,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12324
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606204035962,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813259720802307,
+ "eval_pr_auc": 0.6982118437878516,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.1655,
+ "eval_samples_per_second": 984.988,
+ "eval_steps_per_second": 6.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12350
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933660883181068,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6982130674055568,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.287,
+ "eval_steps_per_second": 4.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12376
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336603284286276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982114384730127,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.525,
+ "eval_steps_per_second": 4.273,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12402
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606982635879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814013719558716,
+ "eval_pr_auc": 0.6982139708732891,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.084,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12428
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606593335921,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813676953315735,
+ "eval_pr_auc": 0.6982155837128797,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.114,
+ "eval_steps_per_second": 4.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12454
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606009385984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813599467277527,
+ "eval_pr_auc": 0.6982237203295948,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.114,
+ "eval_steps_per_second": 4.185,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12480
+ },
+ {
+ "epoch": 480.7692307692308,
+ "grad_norm": 19666.140625,
+ "learning_rate": 4.5173988392051e-09,
+ "loss": 0.1976,
+ "step": 12500
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336608247860743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982221169303999,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.649,
+ "eval_steps_per_second": 4.078,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12506
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336611459585399,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681364417076111,
+ "eval_pr_auc": 0.6982243970162039,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.641,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12532
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621192084356,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6982286439182355,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.229,
+ "eval_samples_per_second": 711.694,
+ "eval_steps_per_second": 4.366,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12558
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621970684273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681386172771454,
+ "eval_pr_auc": 0.6982328773712362,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.789,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12584
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336618856284606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982308470568848,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.4219,
+ "eval_samples_per_second": 386.39,
+ "eval_steps_per_second": 2.37,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12610
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336622749284189,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813745498657227,
+ "eval_pr_auc": 0.6982319234202713,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.256,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12636
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626836933752,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982353364927889,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.782,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12662
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336625766358866,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813805103302,
+ "eval_pr_auc": 0.6982371615828771,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.662,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12688
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982364881625377,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.289,
+ "eval_steps_per_second": 3.793,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12714
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982365330396263,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.058,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12740
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933662722623371,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982369774278672,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.959,
+ "eval_steps_per_second": 3.908,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12766
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627420883689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982367884435748,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1891,
+ "eval_samples_per_second": 862.094,
+ "eval_steps_per_second": 5.289,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12792
+ },
+ {
+ "epoch": 493.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628199483605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982375574473259,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1955,
+ "eval_samples_per_second": 833.803,
+ "eval_steps_per_second": 5.115,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12818
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 13000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.0486892367874104e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7
+size 5368
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d3c9641a30c32b394fa5068aea711c146e2e538
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca38aef4bec71d6d96a0e9bdfd2d7bbd821f572d5ce42d4d2f34673aec533b44
+size 61385376
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c3990d4ad426a586e408f8c7e00a3499227df79
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d33f9cb88038c34fa64d87c5ddba2ba49323b162a432983750bfc5ffba921e20
+size 122881658
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0f6c865946814c37bde90c90990950e7fca5e9b
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4867af6cbff014e37687b6a83467d119b6a2f6f101834ca5a80069323c97a0b
+size 14244
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92e000eddb475f131fcea00cb4cde9f7d5d60e98
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10c756fdb9db8bf7a472800df8b97780cb11d7cca932ec4aba9d03b9e6436112
+size 1064
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e0cab5c6035a2020692bc76aabb88abe32709b51
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/trainer_state.json
@@ -0,0 +1,12185 @@
+{
+ "best_global_step": 12818,
+ "best_metric": 0.6982375574473259,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818",
+ "epoch": 499.0,
+ "eval_steps": 500,
+ "global_step": 12974,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3402481187716087,
+ "eval_auc": 0.3906724936824889,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25902238465052535,
+ "eval_f1_macro": 0.3322238022959372,
+ "eval_loss": 1.061540961265564,
+ "eval_pr_auc": 0.12123677424188789,
+ "eval_precision": 0.15737977933523004,
+ "eval_precision_macro": 0.49946219326282143,
+ "eval_pred_class_0": 5257,
+ "eval_pred_class_1": 14411,
+ "eval_predicted_binding_ratio": 0.7327130364043116,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.49920692785748166,
+ "eval_runtime": 0.3106,
+ "eval_samples_per_second": 524.711,
+ "eval_steps_per_second": 3.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 26
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.34141753101484645,
+ "eval_auc": 0.39093619574173194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25910884859577876,
+ "eval_f1_macro": 0.33318779271412513,
+ "eval_loss": 1.0595855712890625,
+ "eval_pr_auc": 0.12129083172780017,
+ "eval_precision": 0.15748852732582394,
+ "eval_precision_macro": 0.4996674570038125,
+ "eval_pred_class_0": 5286,
+ "eval_pred_class_1": 14382,
+ "eval_predicted_binding_ratio": 0.7312385600976204,
+ "eval_recall": 0.7304095453079652,
+ "eval_recall_macro": 0.4995079053877304,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.188,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 52
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.3436038234695953,
+ "eval_auc": 0.3913807276315981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2594079853143644,
+ "eval_f1_macro": 0.3350089597864736,
+ "eval_loss": 1.0562976598739624,
+ "eval_pr_auc": 0.1213805792649038,
+ "eval_precision": 0.15776986951364175,
+ "eval_precision_macro": 0.5001890381857135,
+ "eval_pred_class_0": 5337,
+ "eval_pred_class_1": 14331,
+ "eval_predicted_binding_ratio": 0.7286455155582673,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5002814346723429,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.877,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 78
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.345688427903193,
+ "eval_auc": 0.39204411422551294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25976416450963474,
+ "eval_f1_macro": 0.3367519287310599,
+ "eval_loss": 1.0516862869262695,
+ "eval_pr_auc": 0.1215177922821225,
+ "eval_precision": 0.15807896947633715,
+ "eval_precision_macro": 0.5007519661646174,
+ "eval_pred_class_0": 5384,
+ "eval_pred_class_1": 14284,
+ "eval_predicted_binding_ratio": 0.7262558470612162,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011256608293798,
+ "eval_runtime": 0.2689,
+ "eval_samples_per_second": 606.218,
+ "eval_steps_per_second": 3.719,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 104
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3489424445800285,
+ "eval_auc": 0.39286881698964193,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25986937171261776,
+ "eval_f1_macro": 0.33937421387990774,
+ "eval_loss": 1.0457645654678345,
+ "eval_pr_auc": 0.12168361829310792,
+ "eval_precision": 0.15830985915492957,
+ "eval_precision_macro": 0.5011556611063601,
+ "eval_pred_class_0": 5468,
+ "eval_pred_class_1": 14200,
+ "eval_predicted_binding_ratio": 0.7219849501728697,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.5017466331928395,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.89,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 130
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.3526540573520439,
+ "eval_auc": 0.3938679358675814,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2595952547103978,
+ "eval_f1_macro": 0.34226375201644554,
+ "eval_loss": 1.0385552644729614,
+ "eval_pr_auc": 0.12189495582289459,
+ "eval_precision": 0.15835402625044342,
+ "eval_precision_macro": 0.5012118238196412,
+ "eval_pred_class_0": 5573,
+ "eval_pred_class_1": 14095,
+ "eval_predicted_binding_ratio": 0.7166463290624364,
+ "eval_recall": 0.7197678168332796,
+ "eval_recall_macro": 0.5018528828839544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.675,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 156
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3575859263778727,
+ "eval_auc": 0.39509779283079605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25933524825605253,
+ "eval_f1_macro": 0.34607916966826957,
+ "eval_loss": 1.0300335884094238,
+ "eval_pr_auc": 0.12215992714628282,
+ "eval_precision": 0.1584754262788365,
+ "eval_precision_macro": 0.5013918287261083,
+ "eval_pred_class_0": 5710,
+ "eval_pred_class_1": 13958,
+ "eval_predicted_binding_ratio": 0.7096806996135855,
+ "eval_recall": 0.7133182844243793,
+ "eval_recall_macro": 0.5021592327536275,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.797,
+ "eval_steps_per_second": 3.956,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 182
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.36261948342485256,
+ "eval_auc": 0.39656283563130934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2583126257247663,
+ "eval_f1_macro": 0.3497589695442054,
+ "eval_loss": 1.0202081203460693,
+ "eval_pr_auc": 0.12247236024679278,
+ "eval_precision": 0.1581769436997319,
+ "eval_precision_macro": 0.5008542806107318,
+ "eval_pred_class_0": 5867,
+ "eval_pred_class_1": 13801,
+ "eval_predicted_binding_ratio": 0.7016981899532235,
+ "eval_recall": 0.7039664624314738,
+ "eval_recall_macro": 0.5013464231032241,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.667,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 208
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3690258287573724,
+ "eval_auc": 0.39822865015280895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25848470363288717,
+ "eval_f1_macro": 0.3546848296925498,
+ "eval_loss": 1.0091010332107544,
+ "eval_pr_auc": 0.12282975659183427,
+ "eval_precision": 0.15863586358635864,
+ "eval_precision_macro": 0.5015788301853557,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6975169300225733,
+ "eval_recall_macro": 0.5025280068716114,
+ "eval_runtime": 0.2623,
+ "eval_samples_per_second": 621.417,
+ "eval_steps_per_second": 3.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.37553386211104334,
+ "eval_auc": 0.4001638991754374,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25788519637462237,
+ "eval_f1_macro": 0.35943500580602444,
+ "eval_loss": 0.9966734647750854,
+ "eval_pr_auc": 0.12325069957928089,
+ "eval_precision": 0.1586735073239646,
+ "eval_precision_macro": 0.5015911353953799,
+ "eval_pred_class_0": 6219,
+ "eval_pred_class_1": 13449,
+ "eval_predicted_binding_ratio": 0.6838010982306284,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.5025904311199223,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.918,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 260
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.38382143583485867,
+ "eval_auc": 0.4023744221985687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25855001529519733,
+ "eval_f1_macro": 0.3657153538700335,
+ "eval_loss": 0.9828852415084839,
+ "eval_pr_auc": 0.12373084625745168,
+ "eval_precision": 0.15954394442766537,
+ "eval_precision_macro": 0.5028728439448414,
+ "eval_pred_class_0": 6424,
+ "eval_pred_class_1": 13244,
+ "eval_predicted_binding_ratio": 0.6733780760626398,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.5047576347901956,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.475,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 286
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3912955053894651,
+ "eval_auc": 0.40482715792324586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2574122317330356,
+ "eval_f1_macro": 0.370844480646392,
+ "eval_loss": 0.9678097367286682,
+ "eval_pr_auc": 0.12427357405982056,
+ "eval_precision": 0.15935796021810922,
+ "eval_precision_macro": 0.5025013059703454,
+ "eval_pred_class_0": 6647,
+ "eval_pred_class_1": 13021,
+ "eval_predicted_binding_ratio": 0.6620398617042912,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.5042139676659523,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.751,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 312
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.4013117754728493,
+ "eval_auc": 0.40764224431659535,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2572383775941462,
+ "eval_f1_macro": 0.3779059068484294,
+ "eval_loss": 0.9513856172561646,
+ "eval_pr_auc": 0.12488748600523823,
+ "eval_precision": 0.15989648682559598,
+ "eval_precision_macro": 0.5031697587395765,
+ "eval_pred_class_0": 6916,
+ "eval_pred_class_1": 12752,
+ "eval_predicted_binding_ratio": 0.6483628228594671,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.5054414401669225,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.633,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 338
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.4099552572706935,
+ "eval_auc": 0.4108023769954713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25432114630855235,
+ "eval_f1_macro": 0.38308115532732967,
+ "eval_loss": 0.9335527420043945,
+ "eval_pr_auc": 0.12558462856716973,
+ "eval_precision": 0.15880276039159044,
+ "eval_precision_macro": 0.5015495900209409,
+ "eval_pred_class_0": 7206,
+ "eval_pred_class_1": 12462,
+ "eval_predicted_binding_ratio": 0.6336180597925565,
+ "eval_recall": 0.6381812318606901,
+ "eval_recall_macro": 0.5027086517847544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.718,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 364
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.41844620703681107,
+ "eval_auc": 0.4144857969457745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2516356974613975,
+ "eval_f1_macro": 0.3880413644466475,
+ "eval_loss": 0.9142351150512695,
+ "eval_pr_auc": 0.1264136402678906,
+ "eval_precision": 0.15784289583846342,
+ "eval_precision_macro": 0.5002307331563727,
+ "eval_pred_class_0": 7485,
+ "eval_pred_class_1": 12183,
+ "eval_predicted_binding_ratio": 0.6194325808419768,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.5004095532886144,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.136,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 390
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.42897091722595077,
+ "eval_auc": 0.41858189431685716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24961582147390926,
+ "eval_f1_macro": 0.3943717007980979,
+ "eval_loss": 0.8934236168861389,
+ "eval_pr_auc": 0.12736412734017702,
+ "eval_precision": 0.15742457441429294,
+ "eval_precision_macro": 0.4996940867457263,
+ "eval_pred_class_0": 7802,
+ "eval_pred_class_1": 11866,
+ "eval_predicted_binding_ratio": 0.6033150294895261,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.4994487317940711,
+ "eval_runtime": 0.2416,
+ "eval_samples_per_second": 674.578,
+ "eval_steps_per_second": 4.139,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 416
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.4378177750660972,
+ "eval_auc": 0.42318268015385996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24520445081575534,
+ "eval_f1_macro": 0.3986584493314002,
+ "eval_loss": 0.8710600733757019,
+ "eval_pr_auc": 0.12844830521974454,
+ "eval_precision": 0.15552476619328023,
+ "eval_precision_macro": 0.4974052402394973,
+ "eval_pred_class_0": 8120,
+ "eval_pred_class_1": 11548,
+ "eval_predicted_binding_ratio": 0.5871466341264999,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.4952639713574891,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.27,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 442
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4492576774455969,
+ "eval_auc": 0.4283753771124365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23911211014329867,
+ "eval_f1_macro": 0.4037791626413705,
+ "eval_loss": 0.8472632765769958,
+ "eval_pr_auc": 0.12969206942947384,
+ "eval_precision": 0.15285136955545578,
+ "eval_precision_macro": 0.4944498263457579,
+ "eval_pred_class_0": 8533,
+ "eval_pred_class_1": 11135,
+ "eval_predicted_binding_ratio": 0.5661480577587961,
+ "eval_recall": 0.5488552079974202,
+ "eval_recall_macro": 0.4897351430824307,
+ "eval_runtime": 0.281,
+ "eval_samples_per_second": 580.087,
+ "eval_steps_per_second": 3.559,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.46339231238560097,
+ "eval_auc": 0.4341240430739382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23588184187662903,
+ "eval_f1_macro": 0.41119432949496704,
+ "eval_loss": 0.8218646049499512,
+ "eval_pr_auc": 0.13110819406948146,
+ "eval_precision": 0.15208663990290355,
+ "eval_precision_macro": 0.4938729504080779,
+ "eval_pred_class_0": 8957,
+ "eval_pred_class_1": 10711,
+ "eval_predicted_binding_ratio": 0.544590197274761,
+ "eval_recall": 0.5253144147049339,
+ "eval_recall_macro": 0.4885580946585574,
+ "eval_runtime": 0.2447,
+ "eval_samples_per_second": 666.13,
+ "eval_steps_per_second": 4.087,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 494
+ },
+ {
+ "epoch": 19.23076923076923,
+ "grad_norm": 232728.109375,
+ "learning_rate": 3.8384615384615384e-07,
+ "loss": 0.99,
+ "step": 500
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4798657718120805,
+ "eval_auc": 0.44078220133048324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2309427153811457,
+ "eval_f1_macro": 0.4189975157915178,
+ "eval_loss": 0.7945711016654968,
+ "eval_pr_auc": 0.1327664236209388,
+ "eval_precision": 0.15057347318890305,
+ "eval_precision_macro": 0.49263119629657465,
+ "eval_pred_class_0": 9467,
+ "eval_pred_class_1": 10201,
+ "eval_predicted_binding_ratio": 0.5186597518812284,
+ "eval_recall": 0.49532408900354724,
+ "eval_recall_macro": 0.4861481916617905,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.734,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 520
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.49964409192597115,
+ "eval_auc": 0.4482004774880778,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22407947646455886,
+ "eval_f1_macro": 0.42742637388305044,
+ "eval_loss": 0.765658974647522,
+ "eval_pr_auc": 0.1347110745909903,
+ "eval_precision": 0.14829889375913172,
+ "eval_precision_macro": 0.4908656872127009,
+ "eval_pred_class_0": 10086,
+ "eval_pred_class_1": 9582,
+ "eval_predicted_binding_ratio": 0.48718730933496035,
+ "eval_recall": 0.4582392776523702,
+ "eval_recall_macro": 0.48281674753627146,
+ "eval_runtime": 0.2494,
+ "eval_samples_per_second": 653.647,
+ "eval_steps_per_second": 4.01,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 546
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.5294895261338214,
+ "eval_auc": 0.4563512312496838,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22025615099427032,
+ "eval_f1_macro": 0.44167751484473966,
+ "eval_loss": 0.7351489067077637,
+ "eval_pr_auc": 0.13684095333600696,
+ "eval_precision": 0.14908178396258698,
+ "eval_precision_macro": 0.49225486317659667,
+ "eval_pred_class_0": 10901,
+ "eval_pred_class_1": 8767,
+ "eval_predicted_binding_ratio": 0.4457494407158837,
+ "eval_recall": 0.4214769429216382,
+ "eval_recall_macro": 0.48559209613637894,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.993,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 572
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.564317673378076,
+ "eval_auc": 0.46539531162556536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21679919568595193,
+ "eval_f1_macro": 0.45751035677940843,
+ "eval_loss": 0.7033414244651794,
+ "eval_pr_auc": 0.1392772958743257,
+ "eval_precision": 0.15127551020408164,
+ "eval_precision_macro": 0.49468577674559844,
+ "eval_pred_class_0": 11828,
+ "eval_pred_class_1": 7840,
+ "eval_predicted_binding_ratio": 0.3986170429123449,
+ "eval_recall": 0.38245727184779105,
+ "eval_recall_macro": 0.49040772688786005,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.699,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 598
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.6033150294895261,
+ "eval_auc": 0.4754341993823483,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21223747980613894,
+ "eval_f1_macro": 0.4735759293567254,
+ "eval_loss": 0.6706362962722778,
+ "eval_pr_auc": 0.14231930535250045,
+ "eval_precision": 0.1544906658826988,
+ "eval_precision_macro": 0.4975718001003078,
+ "eval_pred_class_0": 12865,
+ "eval_pred_class_1": 6803,
+ "eval_predicted_binding_ratio": 0.34589180394549524,
+ "eval_recall": 0.3389229280877136,
+ "eval_recall_macro": 0.49586334730576304,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.356,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 624
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.6474984746796827,
+ "eval_auc": 0.4864891211002582,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2075665790376043,
+ "eval_f1_macro": 0.4904508280155492,
+ "eval_loss": 0.6374054551124573,
+ "eval_pr_auc": 0.14557281943245967,
+ "eval_precision": 0.16076487252124647,
+ "eval_precision_macro": 0.5021727358326632,
+ "eval_pred_class_0": 14020,
+ "eval_pred_class_1": 5648,
+ "eval_predicted_binding_ratio": 0.28716697173073014,
+ "eval_recall": 0.2928087713640761,
+ "eval_recall_macro": 0.5033489139611471,
+ "eval_runtime": 0.2341,
+ "eval_samples_per_second": 696.138,
+ "eval_steps_per_second": 4.271,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 650
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.6925462680496237,
+ "eval_auc": 0.49869307137754393,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20360858685631503,
+ "eval_f1_macro": 0.5065549471155847,
+ "eval_loss": 0.6044979691505432,
+ "eval_pr_auc": 0.14935675594952297,
+ "eval_precision": 0.17208370436331255,
+ "eval_precision_macro": 0.5093417994668434,
+ "eval_pred_class_0": 15176,
+ "eval_pred_class_1": 4492,
+ "eval_predicted_binding_ratio": 0.22839129550538947,
+ "eval_recall": 0.2492744276039987,
+ "eval_recall_macro": 0.5123960114117054,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.94,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 676
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.7326113483831604,
+ "eval_auc": 0.5121322314924708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1977116704805492,
+ "eval_f1_macro": 0.5186416868006297,
+ "eval_loss": 0.5727357268333435,
+ "eval_pr_auc": 0.15383837227298106,
+ "eval_precision": 0.18760856977417487,
+ "eval_precision_macro": 0.518159780138105,
+ "eval_pred_class_0": 16214,
+ "eval_pred_class_1": 3454,
+ "eval_predicted_binding_ratio": 0.1756152125279642,
+ "eval_recall": 0.20896485004837148,
+ "eval_recall_macro": 0.5197960002037596,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.309,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.7681513117754728,
+ "eval_auc": 0.5270188672472933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18947742623533595,
+ "eval_f1_macro": 0.5271029967130403,
+ "eval_loss": 0.5426873564720154,
+ "eval_pr_auc": 0.1589999639181036,
+ "eval_precision": 0.21108910891089108,
+ "eval_precision_macro": 0.5306451786169109,
+ "eval_pred_class_0": 17143,
+ "eval_pred_class_1": 2525,
+ "eval_predicted_binding_ratio": 0.12838112670327437,
+ "eval_recall": 0.17188003869719445,
+ "eval_recall_macro": 0.5258205046507038,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.973,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 728
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.7986068741102298,
+ "eval_auc": 0.5437619187831915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18076525336091004,
+ "eval_f1_macro": 0.5329784934669249,
+ "eval_loss": 0.5149086117744446,
+ "eval_pr_auc": 0.16564494894795073,
+ "eval_precision": 0.2520184544405998,
+ "eval_precision_macro": 0.5517368953367268,
+ "eval_pred_class_0": 17934,
+ "eval_pred_class_1": 1734,
+ "eval_predicted_binding_ratio": 0.08816351433801098,
+ "eval_recall": 0.14092228313447275,
+ "eval_recall_macro": 0.5313170599592205,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.786,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 754
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.8241814114297336,
+ "eval_auc": 0.5629712926123112,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17548879351454458,
+ "eval_f1_macro": 0.5385440097559633,
+ "eval_loss": 0.4897482395172119,
+ "eval_pr_auc": 0.17432371223202417,
+ "eval_precision": 0.3366880146386093,
+ "eval_precision_macro": 0.5947773855158054,
+ "eval_pred_class_0": 18575,
+ "eval_pred_class_1": 1093,
+ "eval_predicted_binding_ratio": 0.05557250355908074,
+ "eval_recall": 0.11867139632376653,
+ "eval_recall_macro": 0.5374548506940254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.589,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 780
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.8372991661582265,
+ "eval_auc": 0.5843234707368185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16883116883116883,
+ "eval_f1_macro": 0.5393273806169032,
+ "eval_loss": 0.46770623326301575,
+ "eval_pr_auc": 0.18537280435724188,
+ "eval_precision": 0.43391188251001334,
+ "eval_precision_macro": 0.6435905413924347,
+ "eval_pred_class_0": 18919,
+ "eval_pred_class_1": 749,
+ "eval_predicted_binding_ratio": 0.038082163921090095,
+ "eval_recall": 0.10480490164463076,
+ "eval_recall_macro": 0.5396059276135269,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.108,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 806
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.8421293471629042,
+ "eval_auc": 0.6080693861773249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16012983500135244,
+ "eval_f1_macro": 0.5365030891665479,
+ "eval_loss": 0.44841739535331726,
+ "eval_pr_auc": 0.1997259509611161,
+ "eval_precision": 0.4966442953020134,
+ "eval_precision_macro": 0.6747850251677853,
+ "eval_pred_class_0": 19072,
+ "eval_pred_class_1": 596,
+ "eval_predicted_binding_ratio": 0.030303030303030304,
+ "eval_recall": 0.09545307965172525,
+ "eval_recall_macro": 0.5386723960460594,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.915,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 832
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.8450782997762863,
+ "eval_auc": 0.6341019717128032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16497670594683475,
+ "eval_f1_macro": 0.5397977373430758,
+ "eval_loss": 0.4312308728694916,
+ "eval_pr_auc": 0.2181951536640109,
+ "eval_precision": 0.5492700729927007,
+ "eval_precision_macro": 0.7014132791741746,
+ "eval_pred_class_0": 19120,
+ "eval_pred_class_1": 548,
+ "eval_predicted_binding_ratio": 0.0278625177954037,
+ "eval_recall": 0.09706546275395034,
+ "eval_recall_macro": 0.5410781529982704,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.04,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 858
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.8478238763473663,
+ "eval_auc": 0.6614343616815009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17932547299149987,
+ "eval_f1_macro": 0.5477310488609045,
+ "eval_loss": 0.41584137082099915,
+ "eval_pr_auc": 0.24110190314317137,
+ "eval_precision": 0.5989010989010989,
+ "eval_precision_macro": 0.7269162957114008,
+ "eval_pred_class_0": 19122,
+ "eval_pred_class_1": 546,
+ "eval_predicted_binding_ratio": 0.027760829774252593,
+ "eval_recall": 0.1054498548855208,
+ "eval_recall_macro": 0.546115402483504,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.016,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 884
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.8509253609924751,
+ "eval_auc": 0.6891114086357669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20585048754062837,
+ "eval_f1_macro": 0.5617963020129356,
+ "eval_loss": 0.4015716016292572,
+ "eval_pr_auc": 0.2683830744239665,
+ "eval_precision": 0.6429780033840947,
+ "eval_precision_macro": 0.7501727569994856,
+ "eval_pred_class_0": 19077,
+ "eval_pred_class_1": 591,
+ "eval_predicted_binding_ratio": 0.030048810250152533,
+ "eval_recall": 0.12254111576910674,
+ "eval_recall_macro": 0.5549024767594251,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.872,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 910
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8537217815741306,
+ "eval_auc": 0.7168296727231165,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2366675510745556,
+ "eval_f1_macro": 0.5778889812054533,
+ "eval_loss": 0.3880736827850342,
+ "eval_pr_auc": 0.2994175694348318,
+ "eval_precision": 0.6676646706586826,
+ "eval_precision_macro": 0.7639639142767097,
+ "eval_pred_class_0": 19000,
+ "eval_pred_class_1": 668,
+ "eval_predicted_binding_ratio": 0.0339637990644702,
+ "eval_recall": 0.1438245727184779,
+ "eval_recall_macro": 0.5652122199621845,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.227,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.8565690461663616,
+ "eval_auc": 0.743181046261935,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2768520892078954,
+ "eval_f1_macro": 0.59862076733571,
+ "eval_loss": 0.37574923038482666,
+ "eval_pr_auc": 0.33279138215623166,
+ "eval_precision": 0.675,
+ "eval_precision_macro": 0.7696337714649142,
+ "eval_pred_class_0": 18868,
+ "eval_pred_class_1": 800,
+ "eval_predicted_binding_ratio": 0.04067520846044336,
+ "eval_recall": 0.17413737504030957,
+ "eval_recall_macro": 0.5792217629109919,
+ "eval_runtime": 0.2774,
+ "eval_samples_per_second": 587.664,
+ "eval_steps_per_second": 3.605,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 962
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8596705308114704,
+ "eval_auc": 0.7678773986205973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3205317577548006,
+ "eval_f1_macro": 0.6211435791665652,
+ "eval_loss": 0.36427780985832214,
+ "eval_pr_auc": 0.3671211589285648,
+ "eval_precision": 0.6774193548387096,
+ "eval_precision_macro": 0.7732261685723991,
+ "eval_pred_class_0": 18707,
+ "eval_pred_class_1": 961,
+ "eval_predicted_binding_ratio": 0.048861094163107584,
+ "eval_recall": 0.20993227990970656,
+ "eval_recall_macro": 0.5956101913823899,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.252,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 988
+ },
+ {
+ "epoch": 38.46153846153846,
+ "grad_norm": 35024.03515625,
+ "learning_rate": 7.684615384615384e-07,
+ "loss": 0.5725,
+ "step": 1000
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8642464917632703,
+ "eval_auc": 0.7904617013805764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3723554301833568,
+ "eval_f1_macro": 0.6481240123381296,
+ "eval_loss": 0.35397008061408997,
+ "eval_pr_auc": 0.40223746916130343,
+ "eval_precision": 0.6869037294015612,
+ "eval_precision_macro": 0.7810970172797707,
+ "eval_pred_class_0": 18515,
+ "eval_pred_class_1": 1153,
+ "eval_predicted_binding_ratio": 0.058623144193613995,
+ "eval_recall": 0.25540148339245405,
+ "eval_recall_macro": 0.6168055886811972,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.77,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1014
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.867246288387228,
+ "eval_auc": 0.8102996097248453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4125984251968504,
+ "eval_f1_macro": 0.6688826868467987,
+ "eval_loss": 0.3446972072124481,
+ "eval_pr_auc": 0.43559149314237056,
+ "eval_precision": 0.6822916666666666,
+ "eval_precision_macro": 0.7815518582187295,
+ "eval_pred_class_0": 18324,
+ "eval_pred_class_1": 1344,
+ "eval_predicted_binding_ratio": 0.06833435021354485,
+ "eval_recall": 0.29571106094808125,
+ "eval_recall_macro": 0.634968465827454,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.748,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1040
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8715171852755745,
+ "eval_auc": 0.8272611461298317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4583065380493033,
+ "eval_f1_macro": 0.6927107089600444,
+ "eval_loss": 0.33654505014419556,
+ "eval_pr_auc": 0.4645782536288223,
+ "eval_precision": 0.6835038363171355,
+ "eval_precision_macro": 0.7856317237263981,
+ "eval_pred_class_0": 18104,
+ "eval_pred_class_1": 1564,
+ "eval_predicted_binding_ratio": 0.07952003254016676,
+ "eval_recall": 0.344727507255724,
+ "eval_recall_macro": 0.6574244163911867,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.484,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1066
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8743136058572301,
+ "eval_auc": 0.8416796876148132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4898885678910442,
+ "eval_f1_macro": 0.7091078096051335,
+ "eval_loss": 0.329649955034256,
+ "eval_pr_auc": 0.49067495219464874,
+ "eval_precision": 0.6802292263610316,
+ "eval_precision_macro": 0.7867195342316791,
+ "eval_pred_class_0": 17923,
+ "eval_pred_class_1": 1745,
+ "eval_predicted_binding_ratio": 0.08872279845434208,
+ "eval_recall": 0.38277974846823604,
+ "eval_recall_macro": 0.674549166803684,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.276,
+ "eval_steps_per_second": 3.787,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1092
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8758897701850722,
+ "eval_auc": 0.8534597097025247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5176842521240862,
+ "eval_f1_macro": 0.7232330815689724,
+ "eval_loss": 0.32387641072273254,
+ "eval_pr_auc": 0.5115876936649595,
+ "eval_precision": 0.6683673469387755,
+ "eval_precision_macro": 0.7836133097919538,
+ "eval_pred_class_0": 17708,
+ "eval_pred_class_1": 1960,
+ "eval_predicted_binding_ratio": 0.09965426072808622,
+ "eval_recall": 0.42244437278297325,
+ "eval_recall_macro": 0.6916048748685797,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.429,
+ "eval_steps_per_second": 3.8,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1118
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.863260959032272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5468392993145469,
+ "eval_f1_macro": 0.7385059071387897,
+ "eval_loss": 0.3189404308795929,
+ "eval_pr_auc": 0.5291286431025274,
+ "eval_precision": 0.6675964667596467,
+ "eval_precision_macro": 0.7862729722049646,
+ "eval_pred_class_0": 17517,
+ "eval_pred_class_1": 2151,
+ "eval_predicted_binding_ratio": 0.10936546674801709,
+ "eval_recall": 0.4630764269590455,
+ "eval_recall_macro": 0.7099591708043251,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.993,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1144
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8797030709782387,
+ "eval_auc": 0.8710211865407248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5603864734299517,
+ "eval_f1_macro": 0.7453519808982827,
+ "eval_loss": 0.3149340748786926,
+ "eval_pr_auc": 0.5420378923897758,
+ "eval_precision": 0.6611135466900482,
+ "eval_precision_macro": 0.7847466853482449,
+ "eval_pred_class_0": 17387,
+ "eval_pred_class_1": 2281,
+ "eval_predicted_binding_ratio": 0.11597518812283913,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719817861342917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.753,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8811775472849298,
+ "eval_auc": 0.8772876506442417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5748590140076405,
+ "eval_f1_macro": 0.7528983447354317,
+ "eval_loss": 0.3115498721599579,
+ "eval_pr_auc": 0.5526462799402374,
+ "eval_precision": 0.659432387312187,
+ "eval_precision_macro": 0.7856853923591968,
+ "eval_pred_class_0": 17272,
+ "eval_pred_class_1": 2396,
+ "eval_predicted_binding_ratio": 0.12182224933902787,
+ "eval_recall": 0.509513060303128,
+ "eval_recall_macro": 0.7301292590704993,
+ "eval_runtime": 0.247,
+ "eval_samples_per_second": 659.808,
+ "eval_steps_per_second": 4.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1196
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8817368314012609,
+ "eval_auc": 0.8824923380415335,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5840486409155937,
+ "eval_f1_macro": 0.7575589340187262,
+ "eval_loss": 0.3087034523487091,
+ "eval_pr_auc": 0.5616002050007283,
+ "eval_precision": 0.6555600160578081,
+ "eval_precision_macro": 0.7850484483851945,
+ "eval_pred_class_0": 17177,
+ "eval_pred_class_1": 2491,
+ "eval_predicted_binding_ratio": 0.12665243034370552,
+ "eval_recall": 0.526604321186714,
+ "eval_recall_macro": 0.7374073093831197,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.602,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1222
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8833638397396787,
+ "eval_auc": 0.8867719903429474,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5951288386869044,
+ "eval_f1_macro": 0.7634984852775182,
+ "eval_loss": 0.30617523193359375,
+ "eval_pr_auc": 0.5687331552143856,
+ "eval_precision": 0.6573099415204678,
+ "eval_precision_macro": 0.7872879591248483,
+ "eval_pred_class_0": 17103,
+ "eval_pred_class_1": 2565,
+ "eval_predicted_binding_ratio": 0.13041488712629654,
+ "eval_recall": 0.5436955820702999,
+ "eval_recall_macro": 0.7453191497603264,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.489,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1248
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8840248118771609,
+ "eval_auc": 0.8901050792607902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6022667829119442,
+ "eval_f1_macro": 0.7671909492667516,
+ "eval_loss": 0.3041446805000305,
+ "eval_pr_auc": 0.5742293420515451,
+ "eval_precision": 0.6556567957479119,
+ "eval_precision_macro": 0.7874972953730754,
+ "eval_pred_class_0": 17034,
+ "eval_pred_class_1": 2634,
+ "eval_predicted_binding_ratio": 0.13392312385600977,
+ "eval_recall": 0.5569171235085456,
+ "eval_recall_macro": 0.751084867060001,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1274
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8846349400040675,
+ "eval_auc": 0.8931467576948593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6055970797844603,
+ "eval_f1_macro": 0.7690165668701654,
+ "eval_loss": 0.30221912264823914,
+ "eval_pr_auc": 0.5797467982851593,
+ "eval_precision": 0.6568627450980392,
+ "eval_precision_macro": 0.7884983683177079,
+ "eval_pred_class_0": 17016,
+ "eval_pred_class_1": 2652,
+ "eval_predicted_binding_ratio": 0.13483831604636973,
+ "eval_recall": 0.561754272815221,
+ "eval_recall_macro": 0.7534129002755408,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.805,
+ "eval_steps_per_second": 3.993,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1300
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8854484441732764,
+ "eval_auc": 0.8956789398085232,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6095997227516895,
+ "eval_f1_macro": 0.7712381155096151,
+ "eval_loss": 0.30062082409858704,
+ "eval_pr_auc": 0.5844826815319759,
+ "eval_precision": 0.6588014981273408,
+ "eval_precision_macro": 0.7899255166833903,
+ "eval_pred_class_0": 16998,
+ "eval_pred_class_1": 2670,
+ "eval_predicted_binding_ratio": 0.13575350823672971,
+ "eval_recall": 0.5672363753627861,
+ "eval_recall_macro": 0.7561237710700572,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.92,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1326
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8860077282896075,
+ "eval_auc": 0.8977014114868052,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6157010627356874,
+ "eval_f1_macro": 0.774389842453749,
+ "eval_loss": 0.2989857792854309,
+ "eval_pr_auc": 0.5879586440077966,
+ "eval_precision": 0.6571533113794366,
+ "eval_precision_macro": 0.7900469834133675,
+ "eval_pred_class_0": 16935,
+ "eval_pred_class_1": 2733,
+ "eval_predicted_binding_ratio": 0.13895668090298963,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7613048960873738,
+ "eval_runtime": 0.2404,
+ "eval_samples_per_second": 677.963,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1352
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.8864144803742119,
+ "eval_auc": 0.8993954307902827,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6165465156196361,
+ "eval_f1_macro": 0.7749399244764847,
+ "eval_loss": 0.29775407910346985,
+ "eval_pr_auc": 0.5914083972949268,
+ "eval_precision": 0.6590825688073394,
+ "eval_precision_macro": 0.7910298047365505,
+ "eval_pred_class_0": 16943,
+ "eval_pred_class_1": 2725,
+ "eval_predicted_binding_ratio": 0.13854992881838518,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7615463399215019,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.94,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1378
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8866178564165141,
+ "eval_auc": 0.9007296980023092,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6186730506155951,
+ "eval_f1_macro": 0.7760410164688105,
+ "eval_loss": 0.296587198972702,
+ "eval_pr_auc": 0.59415963293408,
+ "eval_precision": 0.6585365853658537,
+ "eval_precision_macro": 0.7910908800004612,
+ "eval_pred_class_0": 16921,
+ "eval_pred_class_1": 2747,
+ "eval_predicted_binding_ratio": 0.1396684970510474,
+ "eval_recall": 0.583360206385037,
+ "eval_recall_macro": 0.7633708136410005,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.694,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8872279845434208,
+ "eval_auc": 0.9019074471661075,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6215017064846416,
+ "eval_f1_macro": 0.7776226419864958,
+ "eval_loss": 0.2955063581466675,
+ "eval_pr_auc": 0.5967231989416606,
+ "eval_precision": 0.6600217470097861,
+ "eval_precision_macro": 0.7921612076464745,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5872299258303773,
+ "eval_recall_macro": 0.7653056733636705,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.741,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1430
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8881431767337807,
+ "eval_auc": 0.9030401348597343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6235455167693361,
+ "eval_f1_macro": 0.778929094226063,
+ "eval_loss": 0.29450690746307373,
+ "eval_pr_auc": 0.5995879576354929,
+ "eval_precision": 0.6642362376959533,
+ "eval_precision_macro": 0.7943337761596458,
+ "eval_pred_class_0": 16925,
+ "eval_pred_class_1": 2743,
+ "eval_predicted_binding_ratio": 0.13946512100874517,
+ "eval_recall": 0.5875524024508223,
+ "eval_recall_macro": 0.7659799798214153,
+ "eval_runtime": 0.2658,
+ "eval_samples_per_second": 613.135,
+ "eval_steps_per_second": 3.762,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1456
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9040587382005859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6243802359377671,
+ "eval_f1_macro": 0.7793863433697854,
+ "eval_loss": 0.2936408519744873,
+ "eval_pr_auc": 0.6024898616264603,
+ "eval_precision": 0.6644832605531296,
+ "eval_precision_macro": 0.7945643253120258,
+ "eval_pred_class_0": 16920,
+ "eval_pred_class_1": 2748,
+ "eval_predicted_binding_ratio": 0.13971934106162295,
+ "eval_recall": 0.5888423089326024,
+ "eval_recall_macro": 0.7665947525830392,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.369,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1482
+ },
+ {
+ "epoch": 57.69230769230769,
+ "grad_norm": 15613.5302734375,
+ "learning_rate": 9.992863736980368e-07,
+ "loss": 0.3115,
+ "step": 1500
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9048886089216611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6246808510638298,
+ "eval_f1_macro": 0.7793916194591735,
+ "eval_loss": 0.29281434416770935,
+ "eval_pr_auc": 0.603713292882509,
+ "eval_precision": 0.6614996395097332,
+ "eval_precision_macro": 0.7932808958765667,
+ "eval_pred_class_0": 16894,
+ "eval_pred_class_1": 2774,
+ "eval_predicted_binding_ratio": 0.14104128533658736,
+ "eval_recall": 0.5917445985166075,
+ "eval_recall_macro": 0.7675328292275196,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.428,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1508
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9056672866982218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6251912289648138,
+ "eval_f1_macro": 0.7796389289833485,
+ "eval_loss": 0.2920655906200409,
+ "eval_pr_auc": 0.6054694565410179,
+ "eval_precision": 0.6610352264557872,
+ "eval_precision_macro": 0.7931493791878604,
+ "eval_pred_class_0": 16886,
+ "eval_pred_class_1": 2782,
+ "eval_predicted_binding_ratio": 0.14144803742119177,
+ "eval_recall": 0.5930345049983876,
+ "eval_recall_macro": 0.7680570605513457,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.644,
+ "eval_steps_per_second": 4.255,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1534
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9063294664622661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6254049445865303,
+ "eval_f1_macro": 0.7798830166450921,
+ "eval_loss": 0.29136955738067627,
+ "eval_pr_auc": 0.6071731602747702,
+ "eval_precision": 0.6635311143270622,
+ "eval_precision_macro": 0.7942892202018652,
+ "eval_pred_class_0": 16904,
+ "eval_pred_class_1": 2764,
+ "eval_predicted_binding_ratio": 0.14053284523083182,
+ "eval_recall": 0.5914221218961625,
+ "eval_recall_macro": 0.7676432152306912,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.702,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1560
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8886007728289608,
+ "eval_auc": 0.9070085321120007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6269368295589988,
+ "eval_f1_macro": 0.7807307642400976,
+ "eval_loss": 0.290680855512619,
+ "eval_pr_auc": 0.6088679721523397,
+ "eval_precision": 0.6641414141414141,
+ "eval_precision_macro": 0.7947837752525253,
+ "eval_pred_class_0": 16896,
+ "eval_pred_class_1": 2772,
+ "eval_predicted_binding_ratio": 0.14093959731543623,
+ "eval_recall": 0.5936794582392777,
+ "eval_recall_macro": 0.7687417029229828,
+ "eval_runtime": 0.2486,
+ "eval_samples_per_second": 655.642,
+ "eval_steps_per_second": 4.022,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1586
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9076136113046634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6273841961852861,
+ "eval_f1_macro": 0.7810002501366307,
+ "eval_loss": 0.29004454612731934,
+ "eval_pr_auc": 0.6100991712198425,
+ "eval_precision": 0.664741970407795,
+ "eval_precision_macro": 0.7951158511564335,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.5940019348597226,
+ "eval_recall_macro": 0.7689633021917374,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.871,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1612
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8888041488712629,
+ "eval_auc": 0.9081136281710841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6286296484971982,
+ "eval_f1_macro": 0.781621309135136,
+ "eval_loss": 0.28950682282447815,
+ "eval_pr_auc": 0.6111782063777282,
+ "eval_precision": 0.6639167862266858,
+ "eval_precision_macro": 0.794932326762632,
+ "eval_pred_class_0": 16880,
+ "eval_pred_class_1": 2788,
+ "eval_predicted_binding_ratio": 0.14175310148464512,
+ "eval_recall": 0.5969042244437278,
+ "eval_recall_macro": 0.7701730031496119,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.907,
+ "eval_steps_per_second": 4.018,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9086206913667498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6299319727891156,
+ "eval_f1_macro": 0.7824456611913058,
+ "eval_loss": 0.2888965606689453,
+ "eval_pr_auc": 0.6126297306007413,
+ "eval_precision": 0.6664267722202231,
+ "eval_precision_macro": 0.7962366556938643,
+ "eval_pred_class_0": 16889,
+ "eval_pred_class_1": 2779,
+ "eval_predicted_binding_ratio": 0.14129550538946511,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7706360462524945,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.362,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1664
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8900244051250763,
+ "eval_auc": 0.9091278518874051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6305721605465414,
+ "eval_f1_macro": 0.782984177701663,
+ "eval_loss": 0.2884848117828369,
+ "eval_pr_auc": 0.6142560104629078,
+ "eval_precision": 0.6702977487291213,
+ "eval_precision_macro": 0.7980494301171916,
+ "eval_pred_class_0": 16914,
+ "eval_pred_class_1": 2754,
+ "eval_predicted_binding_ratio": 0.14002440512507627,
+ "eval_recall": 0.5952918413415027,
+ "eval_recall_macro": 0.7702420454972136,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 614.992,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1690
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8905836892414074,
+ "eval_auc": 0.9094545718773954,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6327645051194539,
+ "eval_f1_macro": 0.78423982216183,
+ "eval_loss": 0.2880232632160187,
+ "eval_pr_auc": 0.6147358252333397,
+ "eval_precision": 0.6719826023921711,
+ "eval_precision_macro": 0.7991174470355793,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5978716543050628,
+ "eval_recall_macro": 0.7716224934167917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.665,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1716
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.8908887533048607,
+ "eval_auc": 0.9099424231201196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6360244233378561,
+ "eval_f1_macro": 0.7859248910947654,
+ "eval_loss": 0.28752708435058594,
+ "eval_pr_auc": 0.6159928290925853,
+ "eval_precision": 0.6708407871198568,
+ "eval_precision_macro": 0.7990901618287602,
+ "eval_pred_class_0": 16873,
+ "eval_pred_class_1": 2795,
+ "eval_predicted_binding_ratio": 0.14210900955867398,
+ "eval_recall": 0.6046436633344082,
+ "eval_recall_macro": 0.7745557907424743,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.527,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1742
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.910346516476819,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6362247496180614,
+ "eval_f1_macro": 0.7860747010162366,
+ "eval_loss": 0.2870826721191406,
+ "eval_pr_auc": 0.6168347475575285,
+ "eval_precision": 0.6716845878136201,
+ "eval_precision_macro": 0.7994932004123201,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6043211867139633,
+ "eval_recall_macro": 0.7745152743493158,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.866,
+ "eval_steps_per_second": 3.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1768
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.8910921293471629,
+ "eval_auc": 0.9107640601470772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6352179836512262,
+ "eval_f1_macro": 0.7856044496310159,
+ "eval_loss": 0.28665614128112793,
+ "eval_pr_auc": 0.6181373929491851,
+ "eval_precision": 0.673042223024179,
+ "eval_precision_macro": 0.7999465716529429,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.6014188971299581,
+ "eval_recall_macro": 0.7733659343499732,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.359,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1794
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.9111344401273891,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6354822248681748,
+ "eval_f1_macro": 0.7857149295725039,
+ "eval_loss": 0.28624698519706726,
+ "eval_pr_auc": 0.6190938884927122,
+ "eval_precision": 0.6724262059035278,
+ "eval_precision_macro": 0.7997122148522967,
+ "eval_pred_class_0": 16890,
+ "eval_pred_class_1": 2778,
+ "eval_predicted_binding_ratio": 0.14124466137888958,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7737289273635768,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.866,
+ "eval_steps_per_second": 3.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1820
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9114138601724477,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.63642541624193,
+ "eval_f1_macro": 0.7862246662674524,
+ "eval_loss": 0.2858646512031555,
+ "eval_pr_auc": 0.6197061363545492,
+ "eval_precision": 0.6725314183123878,
+ "eval_precision_macro": 0.7998977650704271,
+ "eval_pred_class_0": 16883,
+ "eval_pred_class_1": 2785,
+ "eval_predicted_binding_ratio": 0.14160056945291843,
+ "eval_recall": 0.6039987100935182,
+ "eval_recall_macro": 0.7744747579561573,
+ "eval_runtime": 0.2386,
+ "eval_samples_per_second": 683.037,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1846
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9118766988928523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6373514431239389,
+ "eval_f1_macro": 0.7867436519572335,
+ "eval_loss": 0.2853938341140747,
+ "eval_pr_auc": 0.6212208808374569,
+ "eval_precision": 0.6730010756543564,
+ "eval_precision_macro": 0.8002424656665053,
+ "eval_pred_class_0": 16879,
+ "eval_pred_class_1": 2789,
+ "eval_predicted_binding_ratio": 0.14180394549522066,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7751197111970474,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.683,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9121406831945651,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637411047102677,
+ "eval_f1_macro": 0.7867021736679862,
+ "eval_loss": 0.2850610911846161,
+ "eval_pr_auc": 0.6219405042066507,
+ "eval_precision": 0.6715458764726884,
+ "eval_precision_macro": 0.79960764506032,
+ "eval_pred_class_0": 16867,
+ "eval_pred_class_1": 2801,
+ "eval_predicted_binding_ratio": 0.14241407362212732,
+ "eval_recall": 0.6065785230570784,
+ "eval_recall_macro": 0.7755232206038093,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 618.95,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1898
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.8912446613788896,
+ "eval_auc": 0.9124869655074592,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637026981164093,
+ "eval_f1_macro": 0.7865337040796394,
+ "eval_loss": 0.284681111574173,
+ "eval_pr_auc": 0.6229948438184316,
+ "eval_precision": 0.6722779369627507,
+ "eval_precision_macro": 0.7998744508231626,
+ "eval_pred_class_0": 16876,
+ "eval_pred_class_1": 2792,
+ "eval_predicted_binding_ratio": 0.14195647752694732,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7750291697592493,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.745,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1924
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9128360118500571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6385786802030456,
+ "eval_f1_macro": 0.7873381643700563,
+ "eval_loss": 0.2842992842197418,
+ "eval_pr_auc": 0.6238239183047751,
+ "eval_precision": 0.6717693129227483,
+ "eval_precision_macro": 0.7998801484834395,
+ "eval_pred_class_0": 16859,
+ "eval_pred_class_1": 2809,
+ "eval_predicted_binding_ratio": 0.14282082570673174,
+ "eval_recall": 0.6085133827797484,
+ "eval_recall_macro": 0.7764302895066123,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.282,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1950
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9131794425407568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.638700947225981,
+ "eval_f1_macro": 0.7873973860112672,
+ "eval_loss": 0.2839708924293518,
+ "eval_pr_auc": 0.6248797725776689,
+ "eval_precision": 0.671647100675916,
+ "eval_precision_macro": 0.7998444318708524,
+ "eval_pred_class_0": 16857,
+ "eval_pred_class_1": 2811,
+ "eval_predicted_binding_ratio": 0.14292251372788287,
+ "eval_recall": 0.6088358594001935,
+ "eval_recall_macro": 0.7765613473375688,
+ "eval_runtime": 0.2644,
+ "eval_samples_per_second": 616.603,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1976
+ },
+ {
+ "epoch": 76.92307692307692,
+ "grad_norm": 18483.060546875,
+ "learning_rate": 9.912189372587507e-07,
+ "loss": 0.2796,
+ "step": 2000
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.891651413463494,
+ "eval_auc": 0.9134005357195656,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389971201084195,
+ "eval_f1_macro": 0.7876288504858192,
+ "eval_loss": 0.28368592262268066,
+ "eval_pr_auc": 0.6256253637409228,
+ "eval_precision": 0.6730906495360457,
+ "eval_precision_macro": 0.8005261145225586,
+ "eval_pred_class_0": 16866,
+ "eval_pred_class_1": 2802,
+ "eval_predicted_binding_ratio": 0.14246491763270286,
+ "eval_recall": 0.6081909061593035,
+ "eval_recall_macro": 0.7764501340719858,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.408,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2002
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8918547895057962,
+ "eval_auc": 0.9135784263355038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6399187404773997,
+ "eval_f1_macro": 0.788145675542478,
+ "eval_loss": 0.283357173204422,
+ "eval_pr_auc": 0.6259773419133142,
+ "eval_precision": 0.6735566642908054,
+ "eval_precision_macro": 0.8008691873227245,
+ "eval_pred_class_0": 16862,
+ "eval_pred_class_1": 2806,
+ "eval_predicted_binding_ratio": 0.14266829367500508,
+ "eval_recall": 0.6094808126410836,
+ "eval_recall_macro": 0.7770950873128759,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.804,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2028
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139080660751812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6396335256192739,
+ "eval_f1_macro": 0.788060288914535,
+ "eval_loss": 0.28298139572143555,
+ "eval_pr_auc": 0.6270064177031266,
+ "eval_precision": 0.6749015395631937,
+ "eval_precision_macro": 0.8014211401519672,
+ "eval_pred_class_0": 16875,
+ "eval_pred_class_1": 2793,
+ "eval_predicted_binding_ratio": 0.14200732153752288,
+ "eval_recall": 0.6078684295388584,
+ "eval_recall_macro": 0.7765303395958915,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.262,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2054
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8923123856009763,
+ "eval_auc": 0.9140997087121456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6410169491525424,
+ "eval_f1_macro": 0.7888360257187523,
+ "eval_loss": 0.28268861770629883,
+ "eval_pr_auc": 0.6271691682976167,
+ "eval_precision": 0.6755984280100036,
+ "eval_precision_macro": 0.8019346102940528,
+ "eval_pred_class_0": 16869,
+ "eval_pred_class_1": 2799,
+ "eval_predicted_binding_ratio": 0.1423123856009762,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7774977694572265,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.576,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2080
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9143275951752264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6415989159891599,
+ "eval_f1_macro": 0.7891531311221224,
+ "eval_loss": 0.28239867091178894,
+ "eval_pr_auc": 0.6278526459152028,
+ "eval_precision": 0.6757046022119158,
+ "eval_precision_macro": 0.8020681327098713,
+ "eval_pred_class_0": 16865,
+ "eval_pred_class_1": 2803,
+ "eval_predicted_binding_ratio": 0.14251576164327842,
+ "eval_recall": 0.6107707191228636,
+ "eval_recall_macro": 0.7779513039086281,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.069,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.8925666056538539,
+ "eval_auc": 0.9145931950717662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641317263622475,
+ "eval_f1_macro": 0.7890694555517069,
+ "eval_loss": 0.2821619510650635,
+ "eval_pr_auc": 0.6287354538303637,
+ "eval_precision": 0.6770609318996416,
+ "eval_precision_macro": 0.8026257379014738,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6091583360206385,
+ "eval_recall_macro": 0.7773865561916435,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.835,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2132
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9148203126674294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6442453946256549,
+ "eval_f1_macro": 0.7906286370477088,
+ "eval_loss": 0.2817782461643219,
+ "eval_pr_auc": 0.6293872239214393,
+ "eval_precision": 0.6768465909090909,
+ "eval_precision_macro": 0.8029675631972466,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6146404385682038,
+ "eval_recall_macro": 0.7798559831520322,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.502,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2158
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8929225137278829,
+ "eval_auc": 0.9150136584917113,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6435341909275558,
+ "eval_f1_macro": 0.7902665569930348,
+ "eval_loss": 0.2815438210964203,
+ "eval_pr_auc": 0.6300492382313454,
+ "eval_precision": 0.677235482721767,
+ "eval_precision_macro": 0.8030326633702543,
+ "eval_pred_class_0": 16861,
+ "eval_pred_class_1": 2807,
+ "eval_predicted_binding_ratio": 0.14271913768558064,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7791705135179836,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.685,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2184
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8928716697173072,
+ "eval_auc": 0.9151760160393141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6437869822485207,
+ "eval_f1_macro": 0.7903713942390684,
+ "eval_loss": 0.28126296401023865,
+ "eval_pr_auc": 0.6304146488380505,
+ "eval_precision": 0.6766169154228856,
+ "eval_precision_macro": 0.8027975997548746,
+ "eval_pred_class_0": 16854,
+ "eval_pred_class_1": 2814,
+ "eval_predicted_binding_ratio": 0.14307504575960953,
+ "eval_recall": 0.6139954853273137,
+ "eval_recall_macro": 0.7795335065315872,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.417,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2210
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9154795142867925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.643861134631668,
+ "eval_f1_macro": 0.7904777241463208,
+ "eval_loss": 0.2809857428073883,
+ "eval_pr_auc": 0.6313964494387146,
+ "eval_precision": 0.677960057061341,
+ "eval_precision_macro": 0.803401280902587,
+ "eval_pred_class_0": 16864,
+ "eval_pred_class_1": 2804,
+ "eval_predicted_binding_ratio": 0.14256660565385398,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7792610549557817,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.65,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2236
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9156233995513745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6458999831621485,
+ "eval_f1_macro": 0.7914651276711422,
+ "eval_loss": 0.28072693943977356,
+ "eval_pr_auc": 0.631527672626228,
+ "eval_precision": 0.6758280479210712,
+ "eval_precision_macro": 0.8027684505796682,
+ "eval_pred_class_0": 16830,
+ "eval_pred_class_1": 2838,
+ "eval_predicted_binding_ratio": 0.14429530201342283,
+ "eval_recall": 0.618510158013544,
+ "eval_recall_macro": 0.7814890380820421,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.026,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2262
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9158623713307676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6449103821440649,
+ "eval_f1_macro": 0.7910387587819241,
+ "eval_loss": 0.28049618005752563,
+ "eval_pr_auc": 0.6324229662687507,
+ "eval_precision": 0.6779239246356203,
+ "eval_precision_macro": 0.8035422055690709,
+ "eval_pred_class_0": 16855,
+ "eval_pred_class_1": 2813,
+ "eval_predicted_binding_ratio": 0.14302420174903396,
+ "eval_recall": 0.6149629151886489,
+ "eval_recall_macro": 0.7801379433793187,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.715,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2288
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9160425393514616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6452919338508268,
+ "eval_f1_macro": 0.7911883195144587,
+ "eval_loss": 0.28025364875793457,
+ "eval_pr_auc": 0.6329798450144843,
+ "eval_precision": 0.6768141592920354,
+ "eval_precision_macro": 0.8031105172758937,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7807328715755691,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.097,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2314
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8936343298759406,
+ "eval_auc": 0.9161711835226769,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463826910074375,
+ "eval_f1_macro": 0.7918927219250234,
+ "eval_loss": 0.2800801396369934,
+ "eval_pr_auc": 0.6332605675535015,
+ "eval_precision": 0.6792184724689165,
+ "eval_precision_macro": 0.8043336176502299,
+ "eval_pred_class_0": 16853,
+ "eval_pred_class_1": 2815,
+ "eval_predicted_binding_ratio": 0.14312588977018506,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7810346763682292,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.634,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8934309538336384,
+ "eval_auc": 0.9163414730569294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463044211947351,
+ "eval_f1_macro": 0.7917843566614202,
+ "eval_loss": 0.2798333764076233,
+ "eval_pr_auc": 0.633657166273441,
+ "eval_precision": 0.6778761061946903,
+ "eval_precision_macro": 0.8037305484960271,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.617542728152209,
+ "eval_recall_macro": 0.7813071279440347,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.372,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2366
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8937868619076673,
+ "eval_auc": 0.9165959292421633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6469494676356261,
+ "eval_f1_macro": 0.7922200583338069,
+ "eval_loss": 0.27958908677101135,
+ "eval_pr_auc": 0.6345494466448222,
+ "eval_precision": 0.6796875,
+ "eval_precision_macro": 0.8046253782933777,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.617220251531764,
+ "eval_recall_macro": 0.7813873334679403,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.29,
+ "eval_steps_per_second": 3.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2392
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8938885499288184,
+ "eval_auc": 0.9168139566838005,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6482386650935446,
+ "eval_f1_macro": 0.7928796235385993,
+ "eval_loss": 0.27935075759887695,
+ "eval_pr_auc": 0.6350396647674293,
+ "eval_precision": 0.6790254237288136,
+ "eval_precision_macro": 0.8045281549625298,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.7826272149050808,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.307,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2418
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8941936139922717,
+ "eval_auc": 0.916949978089225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6492499578628013,
+ "eval_f1_macro": 0.7934750822155368,
+ "eval_loss": 0.27906060218811035,
+ "eval_pr_auc": 0.6354743744677446,
+ "eval_precision": 0.6800847457627118,
+ "eval_precision_macro": 0.8051469107763429,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7832014712735463,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.231,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2444
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.894498678055725,
+ "eval_auc": 0.9171692902207247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6496707749451291,
+ "eval_f1_macro": 0.7937845988573549,
+ "eval_loss": 0.2788851261138916,
+ "eval_pr_auc": 0.6362118552671664,
+ "eval_precision": 0.6817859673990078,
+ "eval_precision_macro": 0.8059588747122072,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6204450177362141,
+ "eval_recall_macro": 0.7831204384872295,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.948,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2470
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9172832383185145,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.64977192093259,
+ "eval_f1_macro": 0.7938688135051675,
+ "eval_loss": 0.27873092889785767,
+ "eval_pr_auc": 0.6365172336600542,
+ "eval_precision": 0.6823988644428672,
+ "eval_precision_macro": 0.8062439426071903,
+ "eval_pred_class_0": 16850,
+ "eval_pred_class_1": 2818,
+ "eval_predicted_binding_ratio": 0.14327842180191175,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.783049741614805,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.058,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2496
+ },
+ {
+ "epoch": 96.15384615384616,
+ "grad_norm": 12855.328125,
+ "learning_rate": 9.74310718484651e-07,
+ "loss": 0.268,
+ "step": 2500
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8948037421191783,
+ "eval_auc": 0.9174495472606937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506837751139625,
+ "eval_f1_macro": 0.7943808843546348,
+ "eval_loss": 0.27859047055244446,
+ "eval_pr_auc": 0.636938752781715,
+ "eval_precision": 0.6828490432317506,
+ "eval_precision_macro": 0.8065794545376371,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783694694855695,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.744,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2522
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8951596501932072,
+ "eval_auc": 0.9175816853990344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6521592442645074,
+ "eval_f1_macro": 0.7952187504847441,
+ "eval_loss": 0.2782803475856781,
+ "eval_pr_auc": 0.6372336473067074,
+ "eval_precision": 0.6837637071100107,
+ "eval_precision_macro": 0.8072045778587877,
+ "eval_pred_class_0": 16841,
+ "eval_pred_class_1": 2827,
+ "eval_predicted_binding_ratio": 0.14373601789709173,
+ "eval_recall": 0.6233473073202193,
+ "eval_recall_macro": 0.784692305196296,
+ "eval_runtime": 0.2192,
+ "eval_samples_per_second": 743.723,
+ "eval_steps_per_second": 4.563,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2548
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917689375499995,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6515867656988521,
+ "eval_f1_macro": 0.7949062764205981,
+ "eval_loss": 0.2782030701637268,
+ "eval_pr_auc": 0.6376582660543189,
+ "eval_precision": 0.683669854764435,
+ "eval_precision_macro": 0.8070768389286704,
+ "eval_pred_class_0": 16845,
+ "eval_pred_class_1": 2823,
+ "eval_predicted_binding_ratio": 0.1435326418547895,
+ "eval_recall": 0.6223798774588842,
+ "eval_recall_macro": 0.7842387707448946,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.364,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917862049496492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513513513513514,
+ "eval_f1_macro": 0.7947922665303561,
+ "eval_loss": 0.2779688835144043,
+ "eval_pr_auc": 0.6381115995039711,
+ "eval_precision": 0.6839304717985101,
+ "eval_precision_macro": 0.8071560484103832,
+ "eval_pred_class_0": 16849,
+ "eval_pred_class_1": 2819,
+ "eval_predicted_binding_ratio": 0.14332926581248728,
+ "eval_recall": 0.6217349242179941,
+ "eval_recall_macro": 0.7839766550829814,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.395,
+ "eval_steps_per_second": 4.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2600
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8951088061826317,
+ "eval_auc": 0.9180189763096767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513435862768294,
+ "eval_f1_macro": 0.7948061179237165,
+ "eval_loss": 0.27778077125549316,
+ "eval_pr_auc": 0.6385730633658938,
+ "eval_precision": 0.6843039772727273,
+ "eval_precision_macro": 0.8073193278245905,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783875777731291,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.591,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2626
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.895413870246085,
+ "eval_auc": 0.9182058500221522,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6518869521069555,
+ "eval_f1_macro": 0.7951749356520059,
+ "eval_loss": 0.2776651084423065,
+ "eval_pr_auc": 0.6390385071928153,
+ "eval_precision": 0.6858974358974359,
+ "eval_precision_macro": 0.8081029290993704,
+ "eval_pred_class_0": 16860,
+ "eval_pred_class_1": 2808,
+ "eval_predicted_binding_ratio": 0.1427699816961562,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7839258027759306,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.358,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2652
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8952104942037828,
+ "eval_auc": 0.9183596527031714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6527379949452401,
+ "eval_f1_macro": 0.7955166277830898,
+ "eval_loss": 0.2773846685886383,
+ "eval_pr_auc": 0.6394513455183966,
+ "eval_precision": 0.6834862385321101,
+ "eval_precision_macro": 0.8071702310636076,
+ "eval_pred_class_0": 16834,
+ "eval_pred_class_1": 2834,
+ "eval_predicted_binding_ratio": 0.1440919259711206,
+ "eval_recall": 0.6246372138019993,
+ "eval_recall_macro": 0.785246716999388,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.926,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2678
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9185293680199856,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.2772791385650635,
+ "eval_pr_auc": 0.6398373166129732,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.649,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2704
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9186085224340037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6536519690339953,
+ "eval_f1_macro": 0.7960120658489734,
+ "eval_loss": 0.27707138657569885,
+ "eval_pr_auc": 0.6400560131071933,
+ "eval_precision": 0.6835621260119676,
+ "eval_precision_macro": 0.8073423632971826,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6262495969042244,
+ "eval_recall_macro": 0.7859925475919686,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.533,
+ "eval_steps_per_second": 3.985,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2730
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9187703544266627,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.27683117985725403,
+ "eval_pr_auc": 0.6407556071793965,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.3724,
+ "eval_samples_per_second": 437.667,
+ "eval_steps_per_second": 2.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2756
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8955664022778117,
+ "eval_auc": 0.918983612943811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6543251430494783,
+ "eval_f1_macro": 0.7964085438550979,
+ "eval_loss": 0.2766495645046234,
+ "eval_pr_auc": 0.6413517959683596,
+ "eval_precision": 0.6842661034846885,
+ "eval_precision_macro": 0.8077537803332993,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6268945501451145,
+ "eval_recall_macro": 0.7863753851709456,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.95,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2782
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8957189343095383,
+ "eval_auc": 0.9190842761805244,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655583543240974,
+ "eval_f1_macro": 0.7970707027489733,
+ "eval_loss": 0.27643415331840515,
+ "eval_pr_auc": 0.6415079341486267,
+ "eval_precision": 0.6839523475823406,
+ "eval_precision_macro": 0.8078082185158045,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6294743631086747,
+ "eval_recall_macro": 0.7875143892563956,
+ "eval_runtime": 0.2441,
+ "eval_samples_per_second": 667.885,
+ "eval_steps_per_second": 4.097,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9192001707781057,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6554565327055658,
+ "eval_f1_macro": 0.7970445082288499,
+ "eval_loss": 0.2763550579547882,
+ "eval_pr_auc": 0.6419306315808602,
+ "eval_precision": 0.6848208011243851,
+ "eval_precision_macro": 0.8081695255176081,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6285069332473395,
+ "eval_recall_macro": 0.7871815767220581,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.285,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2834
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9193565525713485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655919395465995,
+ "eval_f1_macro": 0.7972685860227431,
+ "eval_loss": 0.27626872062683105,
+ "eval_pr_auc": 0.6423732230660918,
+ "eval_precision": 0.684302733006307,
+ "eval_precision_macro": 0.8080131483516131,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.7877058080458841,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.337,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2860
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8959223103518406,
+ "eval_auc": 0.9193956188221624,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.656485987581809,
+ "eval_f1_macro": 0.7975781647169913,
+ "eval_loss": 0.2761881351470947,
+ "eval_pr_auc": 0.6423085789727141,
+ "eval_precision": 0.6843946815955213,
+ "eval_precision_macro": 0.8081402319339891,
+ "eval_pred_class_0": 16810,
+ "eval_pred_class_1": 2858,
+ "eval_predicted_binding_ratio": 0.1453121822249339,
+ "eval_recall": 0.6307642695904547,
+ "eval_recall_macro": 0.7881593424972857,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.648,
+ "eval_steps_per_second": 3.82,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2886
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9196281671522437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6564757265244414,
+ "eval_f1_macro": 0.7976085010119736,
+ "eval_loss": 0.2759994864463806,
+ "eval_pr_auc": 0.6431408853405497,
+ "eval_precision": 0.685133239831697,
+ "eval_precision_macro": 0.808462195558094,
+ "eval_pred_class_0": 16816,
+ "eval_pred_class_1": 2852,
+ "eval_predicted_binding_ratio": 0.14500711816148057,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.7879575877939047,
+ "eval_runtime": 0.2423,
+ "eval_samples_per_second": 672.612,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2912
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9197686265771928,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6561291407432319,
+ "eval_f1_macro": 0.797440712214738,
+ "eval_loss": 0.27580633759498596,
+ "eval_pr_auc": 0.6436439478836922,
+ "eval_precision": 0.685523541813071,
+ "eval_precision_macro": 0.8085803418255701,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6291518864882296,
+ "eval_recall_macro": 0.7875644143010352,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2938
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9199272079152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6573731944910984,
+ "eval_f1_macro": 0.7981312081136818,
+ "eval_loss": 0.27558717131614685,
+ "eval_pr_auc": 0.6441453864761489,
+ "eval_precision": 0.6859446196985629,
+ "eval_precision_macro": 0.8089550633431857,
+ "eval_pred_class_0": 16815,
+ "eval_pred_class_1": 2853,
+ "eval_predicted_binding_ratio": 0.14505796217205613,
+ "eval_recall": 0.6310867462108997,
+ "eval_recall_macro": 0.7885016636831041,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.272,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2964
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9200543727465736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6571428571428571,
+ "eval_f1_macro": 0.7980197003020941,
+ "eval_loss": 0.2754935324192047,
+ "eval_pr_auc": 0.6445859889828064,
+ "eval_precision": 0.6862056862056862,
+ "eval_precision_macro": 0.8090342302245508,
+ "eval_pred_class_0": 16819,
+ "eval_pred_class_1": 2849,
+ "eval_predicted_binding_ratio": 0.1448545861297539,
+ "eval_recall": 0.6304417929700097,
+ "eval_recall_macro": 0.7882395480211912,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.903,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2990
+ },
+ {
+ "epoch": 115.38461538461539,
+ "grad_norm": 13551.1435546875,
+ "learning_rate": 9.488660254357756e-07,
+ "loss": 0.2594,
+ "step": 3000
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.9201159794649721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.2753925323486328,
+ "eval_pr_auc": 0.6447630589609926,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.638,
+ "eval_steps_per_second": 3.912,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3016
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.920259670079575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.27515658736228943,
+ "eval_pr_auc": 0.6451647424161069,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.985,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.920327525062304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6583975863224941,
+ "eval_f1_macro": 0.7986623832121911,
+ "eval_loss": 0.2750197649002075,
+ "eval_pr_auc": 0.6454382852682906,
+ "eval_precision": 0.6855148342059337,
+ "eval_precision_macro": 0.8089241730394068,
+ "eval_pred_class_0": 16803,
+ "eval_pred_class_1": 2865,
+ "eval_predicted_binding_ratio": 0.14566809029896277,
+ "eval_recall": 0.6333440825540149,
+ "eval_recall_macro": 0.7894794294583318,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.495,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3068
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.9205032745284716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585120643431636,
+ "eval_f1_macro": 0.7987177919414212,
+ "eval_loss": 0.27491119503974915,
+ "eval_pr_auc": 0.6460050422984182,
+ "eval_precision": 0.6853854202999651,
+ "eval_precision_macro": 0.8088851986923312,
+ "eval_pred_class_0": 16801,
+ "eval_pred_class_1": 2867,
+ "eval_predicted_binding_ratio": 0.1457697783201139,
+ "eval_recall": 0.6336665591744598,
+ "eval_recall_macro": 0.7896104872892882,
+ "eval_runtime": 0.2295,
+ "eval_samples_per_second": 710.293,
+ "eval_steps_per_second": 4.358,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3094
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9205394599595942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6587487453997992,
+ "eval_f1_macro": 0.7987969999557303,
+ "eval_loss": 0.2747833728790283,
+ "eval_pr_auc": 0.6459576422229408,
+ "eval_precision": 0.6843934654153633,
+ "eval_precision_macro": 0.8084881983738124,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7900743576545822,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.079,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3120
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9206969317927203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6591785414920369,
+ "eval_f1_macro": 0.7991286912009045,
+ "eval_loss": 0.2746541202068329,
+ "eval_pr_auc": 0.6466379382676535,
+ "eval_precision": 0.6864525139664804,
+ "eval_precision_macro": 0.8094545359644352,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6339890357949048,
+ "eval_recall_macro": 0.7898924475165747,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.354,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3146
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9208301697034432,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592927769398358,
+ "eval_f1_macro": 0.7991839832435101,
+ "eval_loss": 0.2745382785797119,
+ "eval_pr_auc": 0.6470920919458031,
+ "eval_precision": 0.6863224005582693,
+ "eval_precision_macro": 0.809415217658018,
+ "eval_pred_class_0": 16802,
+ "eval_pred_class_1": 2866,
+ "eval_predicted_binding_ratio": 0.14571893430953833,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7900235053475313,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.372,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3172
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.8967358145210494,
+ "eval_auc": 0.9209352222971863,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597420003350645,
+ "eval_f1_macro": 0.7994367387715422,
+ "eval_loss": 0.2744734585285187,
+ "eval_pr_auc": 0.6474021950727136,
+ "eval_precision": 0.6865411436541143,
+ "eval_precision_macro": 0.809580095636581,
+ "eval_pred_class_0": 16800,
+ "eval_pred_class_1": 2868,
+ "eval_predicted_binding_ratio": 0.14582062233068943,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7903459819679763,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.926,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3198
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9210344159265571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6599664991624791,
+ "eval_f1_macro": 0.7995630613656908,
+ "eval_loss": 0.2742863893508911,
+ "eval_pr_auc": 0.6479185906925482,
+ "eval_precision": 0.6866504008365284,
+ "eval_precision_macro": 0.8096624823993346,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.6352789422766849,
+ "eval_recall_macro": 0.7905072202781989,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.863,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3224
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9211382130279347,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6603015075376885,
+ "eval_f1_macro": 0.7997605361820792,
+ "eval_loss": 0.27425193786621094,
+ "eval_pr_auc": 0.6481518613470144,
+ "eval_precision": 0.6869989543394911,
+ "eval_precision_macro": 0.8098665228272252,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7906986390676873,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.835,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9212811737051158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6598456893659845,
+ "eval_f1_macro": 0.7995399118610351,
+ "eval_loss": 0.2741680145263672,
+ "eval_pr_auc": 0.6487724280436702,
+ "eval_precision": 0.6875218455085634,
+ "eval_precision_macro": 0.8100249793973471,
+ "eval_pred_class_0": 16807,
+ "eval_pred_class_1": 2861,
+ "eval_predicted_binding_ratio": 0.14546471425666058,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7901744077438613,
+ "eval_runtime": 0.2775,
+ "eval_samples_per_second": 587.468,
+ "eval_steps_per_second": 3.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.8966849705104739,
+ "eval_auc": 0.9212962201485035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608811748998665,
+ "eval_f1_macro": 0.7999703379297798,
+ "eval_loss": 0.2739817500114441,
+ "eval_pr_auc": 0.648761125236648,
+ "eval_precision": 0.6848841231407817,
+ "eval_precision_macro": 0.809033228048307,
+ "eval_pred_class_0": 16777,
+ "eval_pred_class_1": 2891,
+ "eval_predicted_binding_ratio": 0.1469900345739272,
+ "eval_recall": 0.6385037084811351,
+ "eval_recall_macro": 0.7917574376292318,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.003,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3302
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.8971425666056538,
+ "eval_auc": 0.9213870048987755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608549874266555,
+ "eval_f1_macro": 0.8001167448595325,
+ "eval_loss": 0.2739529609680176,
+ "eval_pr_auc": 0.6489216955933252,
+ "eval_precision": 0.6881983240223464,
+ "eval_precision_macro": 0.8104762150937725,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7908495414640173,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3328
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.8970408785845028,
+ "eval_auc": 0.9215601460552225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6604058359885964,
+ "eval_f1_macro": 0.7998640212813866,
+ "eval_loss": 0.27383002638816833,
+ "eval_pr_auc": 0.6495277773578755,
+ "eval_precision": 0.6879804332634522,
+ "eval_precision_macro": 0.8103117684584547,
+ "eval_pred_class_0": 16806,
+ "eval_pred_class_1": 2862,
+ "eval_predicted_binding_ratio": 0.1455155582672361,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7905270648435724,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.602,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3354
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.8969900345739272,
+ "eval_auc": 0.9216559138449605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6610906657745065,
+ "eval_f1_macro": 0.8001778048579948,
+ "eval_loss": 0.27363157272338867,
+ "eval_pr_auc": 0.6497825902519792,
+ "eval_precision": 0.6868265554396942,
+ "eval_precision_macro": 0.8099131883862756,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.6372138019993551,
+ "eval_recall_macro": 0.7914142891810019,
+ "eval_runtime": 0.2147,
+ "eval_samples_per_second": 759.086,
+ "eval_steps_per_second": 4.657,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3380
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9217394576160085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6619906479625919,
+ "eval_f1_macro": 0.800648676506185,
+ "eval_loss": 0.27347350120544434,
+ "eval_pr_auc": 0.6501280761818352,
+ "eval_precision": 0.686525805334257,
+ "eval_precision_macro": 0.8099216238398834,
+ "eval_pred_class_0": 16781,
+ "eval_pred_class_1": 2887,
+ "eval_predicted_binding_ratio": 0.14678665853162498,
+ "eval_recall": 0.6391486617220251,
+ "eval_recall_macro": 0.7922609971252728,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.642,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3406
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9218276924515536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6609715242881072,
+ "eval_f1_macro": 0.8001554858148563,
+ "eval_loss": 0.2734222412109375,
+ "eval_pr_auc": 0.6505561229387223,
+ "eval_precision": 0.6876960613454165,
+ "eval_precision_macro": 0.8102746036830064,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7910814766466644,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.527,
+ "eval_steps_per_second": 3.893,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3432
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.9219583415175538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6620805369127517,
+ "eval_f1_macro": 0.8008688878235859,
+ "eval_loss": 0.273334801197052,
+ "eval_pr_auc": 0.6510448354697362,
+ "eval_precision": 0.6901014340678558,
+ "eval_precision_macro": 0.8114972635268781,
+ "eval_pred_class_0": 16809,
+ "eval_pred_class_1": 2859,
+ "eval_predicted_binding_ratio": 0.14536302623550945,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7913832814393245,
+ "eval_runtime": 0.2542,
+ "eval_samples_per_second": 641.276,
+ "eval_steps_per_second": 3.934,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3458
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.92201595791138,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6632107023411371,
+ "eval_f1_macro": 0.8014158800109571,
+ "eval_loss": 0.2731546461582184,
+ "eval_pr_auc": 0.6511084632800272,
+ "eval_precision": 0.6887808266759291,
+ "eval_precision_macro": 0.8110948031169865,
+ "eval_pred_class_0": 16789,
+ "eval_pred_class_1": 2879,
+ "eval_predicted_binding_ratio": 0.14637990644702054,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7926938597488895,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 638.927,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3484
+ },
+ {
+ "epoch": 134.6153846153846,
+ "grad_norm": 16295.5498046875,
+ "learning_rate": 9.153428025759045e-07,
+ "loss": 0.2515,
+ "step": 3500
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.8977526947325605,
+ "eval_auc": 0.9221653809678686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.663543583737661,
+ "eval_f1_macro": 0.8016300010477627,
+ "eval_loss": 0.2730526030063629,
+ "eval_pr_auc": 0.6517474669432921,
+ "eval_precision": 0.6894993045897079,
+ "eval_precision_macro": 0.8114599905511665,
+ "eval_pred_class_0": 16792,
+ "eval_pred_class_1": 2876,
+ "eval_predicted_binding_ratio": 0.14622737441529388,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7927844011866875,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.318,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.8976510067114094,
+ "eval_auc": 0.9222144814251072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6634342083263668,
+ "eval_f1_macro": 0.8015417181640828,
+ "eval_loss": 0.27299538254737854,
+ "eval_pr_auc": 0.6516584418617962,
+ "eval_precision": 0.6888888888888889,
+ "eval_precision_macro": 0.8111766341037249,
+ "eval_pred_class_0": 16788,
+ "eval_pred_class_1": 2880,
+ "eval_predicted_binding_ratio": 0.1464307504575961,
+ "eval_recall": 0.6397936149629152,
+ "eval_recall_macro": 0.792855098059112,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.888,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3536
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.8978543827537117,
+ "eval_auc": 0.922406970789481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6637656903765691,
+ "eval_f1_macro": 0.8017728364954996,
+ "eval_loss": 0.27294018864631653,
+ "eval_pr_auc": 0.6525345183514315,
+ "eval_precision": 0.6899791231732777,
+ "eval_precision_macro": 0.8117038643138033,
+ "eval_pred_class_0": 16794,
+ "eval_pred_class_1": 2874,
+ "eval_predicted_binding_ratio": 0.14612568639414278,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7928447621452195,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.531,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3562
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.8979560707748627,
+ "eval_auc": 0.9224206449505158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6643251379829402,
+ "eval_f1_macro": 0.8020789283763069,
+ "eval_loss": 0.272890567779541,
+ "eval_pr_auc": 0.652455646685698,
+ "eval_precision": 0.6900625434329395,
+ "eval_precision_macro": 0.8118269834496443,
+ "eval_pred_class_0": 16790,
+ "eval_pred_class_1": 2878,
+ "eval_predicted_binding_ratio": 0.14632906243644497,
+ "eval_recall": 0.6404385682038052,
+ "eval_recall_macro": 0.793298296596621,
+ "eval_runtime": 0.2465,
+ "eval_samples_per_second": 661.298,
+ "eval_steps_per_second": 4.057,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3588
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9225580970332872,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6647727272727273,
+ "eval_f1_macro": 0.802313204605421,
+ "eval_loss": 0.2726689577102661,
+ "eval_pr_auc": 0.6528952820360587,
+ "eval_precision": 0.6899063475546305,
+ "eval_precision_macro": 0.8118283599554506,
+ "eval_pred_class_0": 16785,
+ "eval_pred_class_1": 2883,
+ "eval_predicted_binding_ratio": 0.14658328248932276,
+ "eval_recall": 0.6414059980651403,
+ "eval_recall_macro": 0.7937216505687565,
+ "eval_runtime": 0.1981,
+ "eval_samples_per_second": 822.798,
+ "eval_steps_per_second": 5.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3614
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9226611836622408,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6642078792958928,
+ "eval_f1_macro": 0.8020928521767887,
+ "eval_loss": 0.2726409435272217,
+ "eval_pr_auc": 0.6533512747607447,
+ "eval_precision": 0.6916899441340782,
+ "eval_precision_macro": 0.8125195733524473,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7927637293589026,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.759,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3640
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9227806014244446,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.666110183639399,
+ "eval_f1_macro": 0.8030664874893451,
+ "eval_loss": 0.2725253105163574,
+ "eval_pr_auc": 0.6539162157851042,
+ "eval_precision": 0.6905503634475597,
+ "eval_precision_macro": 0.8123173177271173,
+ "eval_pred_class_0": 16779,
+ "eval_pred_class_1": 2889,
+ "eval_predicted_binding_ratio": 0.14688834655277608,
+ "eval_recall": 0.6433408577878104,
+ "eval_recall_macro": 0.7946890804300916,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.673,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3666
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9228251470721713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6665556850341269,
+ "eval_f1_macro": 0.8032289361592369,
+ "eval_loss": 0.27245020866394043,
+ "eval_pr_auc": 0.6537875546315605,
+ "eval_precision": 0.6889194769442533,
+ "eval_precision_macro": 0.8116772542816959,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6455981941309256,
+ "eval_recall_macro": 0.795515943808989,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.157,
+ "eval_steps_per_second": 3.903,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3692
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9229446329618678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6663324979114453,
+ "eval_f1_macro": 0.8032271166958205,
+ "eval_loss": 0.2723686695098877,
+ "eval_pr_auc": 0.6543603067705216,
+ "eval_precision": 0.6914008321775312,
+ "eval_precision_macro": 0.8127225800544472,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6430183811673653,
+ "eval_recall_macro": 0.7946485640369331,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.012,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3718
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9229539761608667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6658882402001668,
+ "eval_f1_macro": 0.8029060288610683,
+ "eval_loss": 0.2724270820617676,
+ "eval_pr_auc": 0.6540924505284794,
+ "eval_precision": 0.6897028334485141,
+ "eval_precision_macro": 0.8119135366717949,
+ "eval_pred_class_0": 16774,
+ "eval_pred_class_1": 2894,
+ "eval_predicted_binding_ratio": 0.14714256660565386,
+ "eval_recall": 0.6436633344082554,
+ "eval_recall_macro": 0.7947295968232501,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.29,
+ "eval_steps_per_second": 3.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.923075982767793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671105193075899,
+ "eval_f1_macro": 0.8035504588856721,
+ "eval_loss": 0.27218085527420044,
+ "eval_pr_auc": 0.6548951387334131,
+ "eval_precision": 0.6893704850361198,
+ "eval_precision_macro": 0.8119604647601695,
+ "eval_pred_class_0": 16761,
+ "eval_pred_class_1": 2907,
+ "eval_predicted_binding_ratio": 0.14780353874313606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7958686009087,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.353,
+ "eval_steps_per_second": 4.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3770
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9231917800403848,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671111851975329,
+ "eval_f1_macro": 0.8036038872863508,
+ "eval_loss": 0.27211907505989075,
+ "eval_pr_auc": 0.6553363499797747,
+ "eval_precision": 0.6904761904761905,
+ "eval_precision_macro": 0.8124414345344577,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7955659688536286,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 629.971,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3796
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9232496689441817,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676661669165417,
+ "eval_f1_macro": 0.8039077842052783,
+ "eval_loss": 0.2721000015735626,
+ "eval_pr_auc": 0.655340056976625,
+ "eval_precision": 0.6905582356995176,
+ "eval_precision_macro": 0.812564099359958,
+ "eval_pred_class_0": 16766,
+ "eval_pred_class_1": 2902,
+ "eval_predicted_binding_ratio": 0.14754931869025828,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7960195033050301,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.767,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3822
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9233701767462686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6673336668334167,
+ "eval_f1_macro": 0.8037469198020228,
+ "eval_loss": 0.2719952464103699,
+ "eval_pr_auc": 0.6558934902075464,
+ "eval_precision": 0.6909530386740331,
+ "eval_precision_macro": 0.8126837695158862,
+ "eval_pred_class_0": 16772,
+ "eval_pred_class_1": 2896,
+ "eval_predicted_binding_ratio": 0.14724425462680496,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956263298121606,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.275,
+ "eval_steps_per_second": 3.904,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3848
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.8983628228594671,
+ "eval_auc": 0.9234171749837325,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676641729010806,
+ "eval_f1_macro": 0.8038359878940744,
+ "eval_loss": 0.27190613746643066,
+ "eval_pr_auc": 0.6558672612955618,
+ "eval_precision": 0.6890871654083733,
+ "eval_precision_macro": 0.8119245066626444,
+ "eval_pred_class_0": 16754,
+ "eval_pred_class_1": 2914,
+ "eval_predicted_binding_ratio": 0.14815944681716495,
+ "eval_recall": 0.6475330538535956,
+ "eval_recall_macro": 0.796423012711792,
+ "eval_runtime": 0.1964,
+ "eval_samples_per_second": 830.09,
+ "eval_steps_per_second": 5.093,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3874
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9235316972989609,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6682204095222241,
+ "eval_f1_macro": 0.8042113179058208,
+ "eval_loss": 0.2718164622783661,
+ "eval_pr_auc": 0.6562427373314145,
+ "eval_precision": 0.6906400550584997,
+ "eval_precision_macro": 0.8126866902186664,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7964730377564316,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.207,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3900
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.923607970893288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.66744379683597,
+ "eval_f1_macro": 0.8037648014211952,
+ "eval_loss": 0.27192452549934387,
+ "eval_pr_auc": 0.6563118529429329,
+ "eval_precision": 0.6900826446280992,
+ "eval_precision_macro": 0.8123224008156005,
+ "eval_pred_class_0": 16764,
+ "eval_pred_class_1": 2904,
+ "eval_predicted_binding_ratio": 0.1476510067114094,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7959591423464981,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.181,
+ "eval_steps_per_second": 5.645,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3926
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9236869111923289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6687707641196013,
+ "eval_f1_macro": 0.8044598207679289,
+ "eval_loss": 0.2716231048107147,
+ "eval_pr_auc": 0.6568319718650024,
+ "eval_precision": 0.6896197327852004,
+ "eval_precision_macro": 0.812330315374629,
+ "eval_pred_class_0": 16749,
+ "eval_pred_class_1": 2919,
+ "eval_predicted_binding_ratio": 0.1484136668700427,
+ "eval_recall": 0.6491454369558207,
+ "eval_recall_macro": 0.7972292042629046,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.369,
+ "eval_steps_per_second": 4.15,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3952
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9238590985638783,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671120761650242,
+ "eval_f1_macro": 0.8036750821318089,
+ "eval_loss": 0.2716236114501953,
+ "eval_pr_auc": 0.657752547087354,
+ "eval_precision": 0.691961191961192,
+ "eval_precision_macro": 0.8130882112827054,
+ "eval_pred_class_0": 16782,
+ "eval_pred_class_1": 2886,
+ "eval_predicted_binding_ratio": 0.14673581452104942,
+ "eval_recall": 0.6439858110287005,
+ "eval_recall_macro": 0.7951624594468667,
+ "eval_runtime": 0.2469,
+ "eval_samples_per_second": 660.315,
+ "eval_steps_per_second": 4.051,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 153.84615384615384,
+ "grad_norm": 13863.017578125,
+ "learning_rate": 8.743443888522679e-07,
+ "loss": 0.244,
+ "step": 4000
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9239073328287097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6674449633088726,
+ "eval_f1_macro": 0.8038184624582756,
+ "eval_loss": 0.27161940932273865,
+ "eval_pr_auc": 0.6577434889769634,
+ "eval_precision": 0.6911917098445596,
+ "eval_precision_macro": 0.8128050601926549,
+ "eval_pred_class_0": 16773,
+ "eval_pred_class_1": 2895,
+ "eval_predicted_binding_ratio": 0.1471934106162294,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956565102914266,
+ "eval_runtime": 0.2311,
+ "eval_samples_per_second": 705.281,
+ "eval_steps_per_second": 4.327,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4004
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9240325316952941,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8033357331413487,
+ "eval_loss": 0.2715211510658264,
+ "eval_pr_auc": 0.6581299979478261,
+ "eval_precision": 0.689893066574681,
+ "eval_precision_macro": 0.812118099868532,
+ "eval_pred_class_0": 16769,
+ "eval_pred_class_1": 2899,
+ "eval_predicted_binding_ratio": 0.14739678665853162,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7953443695848741,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.735,
+ "eval_steps_per_second": 4.416,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4030
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.8987187309334961,
+ "eval_auc": 0.9240861966945435,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6688829787234043,
+ "eval_f1_macro": 0.8045495325789891,
+ "eval_loss": 0.27123013138771057,
+ "eval_pr_auc": 0.6585643355556502,
+ "eval_precision": 0.6902229845626072,
+ "eval_precision_macro": 0.8126098507842583,
+ "eval_pred_class_0": 16753,
+ "eval_pred_class_1": 2915,
+ "eval_predicted_binding_ratio": 0.14821029082774048,
+ "eval_recall": 0.6488229603353757,
+ "eval_recall_macro": 0.7971585073904801,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.601,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4056
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.8987695749440716,
+ "eval_auc": 0.9242226073999265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668111351891982,
+ "eval_f1_macro": 0.8041939607346642,
+ "eval_loss": 0.2712385952472687,
+ "eval_pr_auc": 0.6591839305732792,
+ "eval_precision": 0.6915113871635611,
+ "eval_precision_macro": 0.8130484783164258,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7961402252220942,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.046,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4082
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9241500613527003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6699883740242485,
+ "eval_f1_macro": 0.8051727852411502,
+ "eval_loss": 0.27118799090385437,
+ "eval_pr_auc": 0.6587023192472763,
+ "eval_precision": 0.6907534246575342,
+ "eval_precision_macro": 0.8130146392454138,
+ "eval_pred_class_0": 16748,
+ "eval_pred_class_1": 2920,
+ "eval_predicted_binding_ratio": 0.14846451088061827,
+ "eval_recall": 0.6504353434376008,
+ "eval_recall_macro": 0.7979646989415927,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.927,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4108
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.8990237949969494,
+ "eval_auc": 0.9243365652302152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6693306693306693,
+ "eval_f1_macro": 0.8048723553674049,
+ "eval_loss": 0.2712218463420868,
+ "eval_pr_auc": 0.6594296109425748,
+ "eval_precision": 0.6919104991394148,
+ "eval_precision_macro": 0.8134133417966358,
+ "eval_pred_class_0": 16763,
+ "eval_pred_class_1": 2905,
+ "eval_predicted_binding_ratio": 0.14770185072198494,
+ "eval_recall": 0.6481780070944857,
+ "eval_recall_macro": 0.7970774746041632,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.627,
+ "eval_steps_per_second": 3.906,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4134
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.8990746390075249,
+ "eval_auc": 0.9243664342695147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6709762970329852,
+ "eval_f1_macro": 0.805686028587357,
+ "eval_loss": 0.2712063789367676,
+ "eval_pr_auc": 0.6596905428752633,
+ "eval_precision": 0.6903137789904502,
+ "eval_precision_macro": 0.8129807422676916,
+ "eval_pred_class_0": 16736,
+ "eval_pred_class_1": 2932,
+ "eval_predicted_binding_ratio": 0.1490746390075249,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7989424647168202,
+ "eval_runtime": 0.1787,
+ "eval_samples_per_second": 912.327,
+ "eval_steps_per_second": 5.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4160
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9244822704721024,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710643708705902,
+ "eval_pr_auc": 0.6603246471492675,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.853,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4186
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9245231275027241,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710554301738739,
+ "eval_pr_auc": 0.6603878428843051,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.523,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.8993288590604027,
+ "eval_auc": 0.9246038585815736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6719681908548708,
+ "eval_f1_macro": 0.8062543656977057,
+ "eval_loss": 0.27094030380249023,
+ "eval_pr_auc": 0.6606722300563197,
+ "eval_precision": 0.6909710391822828,
+ "eval_precision_macro": 0.8134231279100321,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.653982586262496,
+ "eval_recall_macro": 0.7996175984369762,
+ "eval_runtime": 0.2218,
+ "eval_samples_per_second": 735.026,
+ "eval_steps_per_second": 4.509,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4238
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.8992271710392515,
+ "eval_auc": 0.9247262642209572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6713101160862355,
+ "eval_f1_macro": 0.8059006594362601,
+ "eval_loss": 0.2709755003452301,
+ "eval_pr_auc": 0.6610856256039915,
+ "eval_precision": 0.691020826220553,
+ "eval_precision_macro": 0.8133400325618567,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7990330061546183,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.878,
+ "eval_steps_per_second": 4.012,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4264
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9247699826062725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6738446248136492,
+ "eval_f1_macro": 0.8073568599908361,
+ "eval_loss": 0.2707850933074951,
+ "eval_pr_auc": 0.6613417671448518,
+ "eval_precision": 0.6927792915531336,
+ "eval_precision_macro": 0.8145046350187375,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007359306946413,
+ "eval_runtime": 0.1724,
+ "eval_samples_per_second": 945.745,
+ "eval_steps_per_second": 5.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4290
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9248060123174118,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6753719008264463,
+ "eval_f1_macro": 0.8081840577256068,
+ "eval_loss": 0.2708885669708252,
+ "eval_pr_auc": 0.6612557604235284,
+ "eval_precision": 0.6927772126144456,
+ "eval_precision_macro": 0.8147479579430862,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6588197355691713,
+ "eval_recall_macro": 0.8020663535695799,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.671,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4316
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9248204358808662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741721854304635,
+ "eval_f1_macro": 0.8075329932438238,
+ "eval_loss": 0.270906925201416,
+ "eval_pr_auc": 0.6612342465257918,
+ "eval_precision": 0.692752636951344,
+ "eval_precision_macro": 0.8145453662370445,
+ "eval_pred_class_0": 16729,
+ "eval_pred_class_1": 2939,
+ "eval_predicted_binding_ratio": 0.1494305470815538,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010282268358203,
+ "eval_runtime": 0.2263,
+ "eval_samples_per_second": 720.252,
+ "eval_steps_per_second": 4.419,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4342
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9249249239896699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706829011440277,
+ "eval_pr_auc": 0.6618605064537387,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.89,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4368
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9250021027063997,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2705872058868408,
+ "eval_pr_auc": 0.6621173041985378,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.716,
+ "eval_steps_per_second": 3.814,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4394
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9250495097088196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706546485424042,
+ "eval_pr_auc": 0.6620136434657915,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.539,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4420
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9250856659424456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6748182419035029,
+ "eval_f1_macro": 0.8078453665953039,
+ "eval_loss": 0.27060601115226746,
+ "eval_pr_auc": 0.6621340116082275,
+ "eval_precision": 0.6919688241274145,
+ "eval_precision_macro": 0.814310068581025,
+ "eval_pred_class_0": 16717,
+ "eval_pred_class_1": 2951,
+ "eval_predicted_binding_ratio": 0.15004067520846046,
+ "eval_recall": 0.6584972589487262,
+ "eval_recall_macro": 0.8018145738215594,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.143,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9000406752084604,
+ "eval_auc": 0.9252334636716082,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741796486576069,
+ "eval_f1_macro": 0.8075720776469225,
+ "eval_loss": 0.27041611075401306,
+ "eval_pr_auc": 0.6630610344326174,
+ "eval_precision": 0.6934878963518581,
+ "eval_precision_macro": 0.8148646532849819,
+ "eval_pred_class_0": 16735,
+ "eval_pred_class_1": 2933,
+ "eval_predicted_binding_ratio": 0.14912548301810047,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8008264721324394,
+ "eval_runtime": 0.2332,
+ "eval_samples_per_second": 698.832,
+ "eval_steps_per_second": 4.287,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4472
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.8998372991661582,
+ "eval_auc": 0.9253198298673536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740569159497022,
+ "eval_f1_macro": 0.8074417704823604,
+ "eval_loss": 0.2705075442790985,
+ "eval_pr_auc": 0.662984991174244,
+ "eval_precision": 0.6921508664627931,
+ "eval_precision_macro": 0.8142667635751932,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.8010989237082449,
+ "eval_runtime": 0.2134,
+ "eval_samples_per_second": 763.793,
+ "eval_steps_per_second": 4.686,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4498
+ },
+ {
+ "epoch": 173.07692307692307,
+ "grad_norm": 15784.1748046875,
+ "learning_rate": 8.266086590174684e-07,
+ "loss": 0.2376,
+ "step": 4500
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9254431016991443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740679370339685,
+ "eval_f1_macro": 0.8075003208787752,
+ "eval_loss": 0.2703414559364319,
+ "eval_pr_auc": 0.6637127837233647,
+ "eval_precision": 0.6932515337423313,
+ "eval_precision_macro": 0.8147445669189726,
+ "eval_pred_class_0": 16734,
+ "eval_pred_class_1": 2934,
+ "eval_predicted_binding_ratio": 0.14917632702867603,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007962916531735,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.53,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4524
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9255452150782025,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746189529489728,
+ "eval_f1_macro": 0.8078199869849969,
+ "eval_loss": 0.27038928866386414,
+ "eval_pr_auc": 0.6639609479782242,
+ "eval_precision": 0.6936967632027258,
+ "eval_precision_macro": 0.8150250385068789,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8011489487528844,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4550
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9255934298780361,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2702932059764862,
+ "eval_pr_auc": 0.6640830183725597,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.1918,
+ "eval_samples_per_second": 849.843,
+ "eval_steps_per_second": 5.214,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4576
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9255964274877148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6745992397950752,
+ "eval_f1_macro": 0.8077217319600283,
+ "eval_loss": 0.27021023631095886,
+ "eval_pr_auc": 0.6640237853140233,
+ "eval_precision": 0.691864406779661,
+ "eval_precision_macro": 0.8142298466485935,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8016533355113369,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.962,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4602
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.900549115314216,
+ "eval_auc": 0.9257395146873824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6756218905472637,
+ "eval_f1_macro": 0.8084468667292255,
+ "eval_loss": 0.27011793851852417,
+ "eval_pr_auc": 0.6647736112265655,
+ "eval_precision": 0.695459201092523,
+ "eval_precision_macro": 0.8159475347119822,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.801521450417969,
+ "eval_runtime": 0.2294,
+ "eval_samples_per_second": 710.491,
+ "eval_steps_per_second": 4.359,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4628
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9003965832824893,
+ "eval_auc": 0.9258087711499611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6755010766937221,
+ "eval_f1_macro": 0.8083352405901716,
+ "eval_loss": 0.2701837122440338,
+ "eval_pr_auc": 0.6649310182754444,
+ "eval_precision": 0.6944822888283378,
+ "eval_precision_macro": 0.8155055479522995,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.8016930246420839,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.526,
+ "eval_steps_per_second": 5.095,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4654
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9004474272930649,
+ "eval_auc": 0.9258260560681089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6762566137566137,
+ "eval_f1_macro": 0.8087183092815753,
+ "eval_loss": 0.27005937695503235,
+ "eval_pr_auc": 0.6649666141685525,
+ "eval_precision": 0.6939260264675942,
+ "eval_precision_macro": 0.8153859544454471,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.802509552107089,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.51,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9259436830505047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6764851894754261,
+ "eval_f1_macro": 0.8088820685009666,
+ "eval_loss": 0.2699625492095947,
+ "eval_pr_auc": 0.6656768815498022,
+ "eval_precision": 0.6947654656696125,
+ "eval_precision_macro": 0.8157852199805673,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.6591422121896162,
+ "eval_recall_macro": 0.8024690357139305,
+ "eval_runtime": 0.1825,
+ "eval_samples_per_second": 893.316,
+ "eval_steps_per_second": 5.48,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4706
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9259346220939755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787778695293146,
+ "eval_f1_macro": 0.8101680579881181,
+ "eval_loss": 0.2698967456817627,
+ "eval_pr_auc": 0.6657214869012321,
+ "eval_precision": 0.6956668923493569,
+ "eval_precision_macro": 0.8165423129929146,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.8042124766471122,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.302,
+ "eval_steps_per_second": 4.02,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4732
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260415822575143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782407407407407,
+ "eval_f1_macro": 0.8098906179070202,
+ "eval_loss": 0.26987963914871216,
+ "eval_pr_auc": 0.6659658035928079,
+ "eval_precision": 0.6959619952494062,
+ "eval_precision_macro": 0.8165833539431051,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8036580648440201,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.055,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4758
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260998701937684,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6781343036718491,
+ "eval_f1_macro": 0.8098391554406106,
+ "eval_loss": 0.26986023783683777,
+ "eval_pr_auc": 0.6661468987350531,
+ "eval_precision": 0.6960950764006791,
+ "eval_precision_macro": 0.8166237506024205,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6610770719122864,
+ "eval_recall_macro": 0.8035270070130636,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.681,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4784
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9012609314622737,
+ "eval_auc": 0.9262103340569317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787958981144558,
+ "eval_f1_macro": 0.810230030763446,
+ "eval_loss": 0.26986950635910034,
+ "eval_pr_auc": 0.6665273243194801,
+ "eval_precision": 0.6967741935483871,
+ "eval_precision_macro": 0.8170231070594294,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8039098445920405,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.338,
+ "eval_steps_per_second": 4.229,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4810
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9014134634940004,
+ "eval_auc": 0.9262454683781669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6797687861271676,
+ "eval_f1_macro": 0.8107536578092345,
+ "eval_loss": 0.2697572410106659,
+ "eval_pr_auc": 0.666716804071224,
+ "eval_precision": 0.6966824644549763,
+ "eval_precision_macro": 0.8171398441695726,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8047867330155776,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.74,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4836
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9263210315000697,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6784592494627211,
+ "eval_f1_macro": 0.810014015033881,
+ "eval_loss": 0.26983824372291565,
+ "eval_pr_auc": 0.6669884807739552,
+ "eval_precision": 0.6960651289009498,
+ "eval_precision_macro": 0.8166629472255945,
+ "eval_pred_class_0": 16720,
+ "eval_pred_class_1": 2948,
+ "eval_predicted_binding_ratio": 0.14988814317673377,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038193031542425,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.044,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4862
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9012100874516982,
+ "eval_auc": 0.9264191350895575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6786836447825368,
+ "eval_f1_macro": 0.8101580079180191,
+ "eval_loss": 0.26967287063598633,
+ "eval_pr_auc": 0.667515048707415,
+ "eval_precision": 0.6965376782077393,
+ "eval_precision_macro": 0.8169029737767557,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038796641127746,
+ "eval_runtime": 0.1851,
+ "eval_samples_per_second": 880.782,
+ "eval_steps_per_second": 5.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4888
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9013626194834249,
+ "eval_auc": 0.9265750497228504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782752902155887,
+ "eval_f1_macro": 0.8100137635248964,
+ "eval_loss": 0.2697126567363739,
+ "eval_pr_auc": 0.6680073523436961,
+ "eval_precision": 0.698190508706043,
+ "eval_precision_macro": 0.8175521514197519,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.8030528007338771,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.766,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9265310393625665,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6808651147432723,
+ "eval_f1_macro": 0.8113902183590456,
+ "eval_loss": 0.26952171325683594,
+ "eval_pr_auc": 0.6676611850618266,
+ "eval_precision": 0.6975642760487145,
+ "eval_precision_macro": 0.8176966904417818,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8054920472149997,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.883,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4940
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.901921903599756,
+ "eval_auc": 0.9266825451738318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812097174020822,
+ "eval_f1_macro": 0.8116278420268636,
+ "eval_loss": 0.2694892883300781,
+ "eval_pr_auc": 0.668268169384226,
+ "eval_precision": 0.6986440677966101,
+ "eval_precision_macro": 0.8182178348314311,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8054817113011072,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.763,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4966
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9018710595891803,
+ "eval_auc": 0.9266840147811743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6814130075932651,
+ "eval_f1_macro": 0.8117083668893665,
+ "eval_loss": 0.26940062642097473,
+ "eval_pr_auc": 0.6682450098479531,
+ "eval_precision": 0.6980047345282381,
+ "eval_precision_macro": 0.817974900326174,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8058447043147107,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.715,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4992
+ },
+ {
+ "epoch": 192.30769230769232,
+ "grad_norm": 15858.0107421875,
+ "learning_rate": 7.72994743624204e-07,
+ "loss": 0.2316,
+ "step": 5000
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9021761236526337,
+ "eval_auc": 0.9268377687996988,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6816677696889477,
+ "eval_f1_macro": 0.8119380540142443,
+ "eval_loss": 0.26932862401008606,
+ "eval_pr_auc": 0.6692251134414691,
+ "eval_precision": 0.6999660210669385,
+ "eval_precision_macro": 0.8188619343002854,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6643018381167365,
+ "eval_recall_macro": 0.8055015558664808,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.594,
+ "eval_steps_per_second": 5.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5018
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.92693509378927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6815697963238947,
+ "eval_f1_macro": 0.8119084228038069,
+ "eval_loss": 0.2693846523761749,
+ "eval_pr_auc": 0.6695232673057094,
+ "eval_precision": 0.7004765146358066,
+ "eval_precision_macro": 0.8190667092007484,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8052696206838338,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.258,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5044
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9022778116737848,
+ "eval_auc": 0.926939395553809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6825239511067063,
+ "eval_f1_macro": 0.8123875088746679,
+ "eval_loss": 0.269380122423172,
+ "eval_pr_auc": 0.6693235837806535,
+ "eval_precision": 0.6996274974602099,
+ "eval_precision_macro": 0.8188535333546936,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6662366978394066,
+ "eval_recall_macro": 0.8063482638107518,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.818,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5070
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.9269277165550606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6819304892110031,
+ "eval_f1_macro": 0.8119407443800393,
+ "eval_loss": 0.2693455219268799,
+ "eval_pr_auc": 0.6690854783865479,
+ "eval_precision": 0.696969696969697,
+ "eval_precision_macro": 0.8176128877709905,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.8066008708211837,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.505,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5096
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.926915803976337,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6826622843056697,
+ "eval_f1_macro": 0.8122944214527055,
+ "eval_loss": 0.2691311538219452,
+ "eval_pr_auc": 0.6692617138980786,
+ "eval_precision": 0.6960455764075067,
+ "eval_precision_macro": 0.8173347038115213,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8075182756378791,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.485,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5122
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9023794996949359,
+ "eval_auc": 0.9270834170733764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822244289970208,
+ "eval_f1_macro": 0.8122781903500151,
+ "eval_loss": 0.26926350593566895,
+ "eval_pr_auc": 0.6700425139918407,
+ "eval_precision": 0.7007820469228153,
+ "eval_precision_macro": 0.8193035600788525,
+ "eval_pred_class_0": 16727,
+ "eval_pred_class_1": 2941,
+ "eval_predicted_binding_ratio": 0.1495322351027049,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8057533356145012,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.821,
+ "eval_steps_per_second": 5.569,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9023286556843604,
+ "eval_auc": 0.9271362061477199,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822167080231596,
+ "eval_f1_macro": 0.812256712426767,
+ "eval_loss": 0.26931333541870117,
+ "eval_pr_auc": 0.6701662107301889,
+ "eval_precision": 0.7004076086956522,
+ "eval_precision_macro": 0.8191406615590195,
+ "eval_pred_class_0": 16724,
+ "eval_pred_class_1": 2944,
+ "eval_predicted_binding_ratio": 0.14968476713443157,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8058542129661919,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.682,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5174
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.9019727476103315,
+ "eval_auc": 0.9271006630615285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6823723228995058,
+ "eval_f1_macro": 0.8122076248057319,
+ "eval_loss": 0.26923447847366333,
+ "eval_pr_auc": 0.6700054234044599,
+ "eval_precision": 0.6975412596833951,
+ "eval_precision_macro": 0.8179304597716335,
+ "eval_pred_class_0": 16699,
+ "eval_pred_class_1": 2969,
+ "eval_predicted_binding_ratio": 0.15095586739882041,
+ "eval_recall": 0.6678490809416318,
+ "eval_recall_macro": 0.8068224700899382,
+ "eval_runtime": 0.2001,
+ "eval_samples_per_second": 814.446,
+ "eval_steps_per_second": 4.997,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5200
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.9272154481542286,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6827256228345158,
+ "eval_f1_macro": 0.8124672441745833,
+ "eval_loss": 0.2691180408000946,
+ "eval_pr_auc": 0.6705128874375396,
+ "eval_precision": 0.6989864864864865,
+ "eval_precision_macro": 0.8186098340979236,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6672041277007417,
+ "eval_recall_macro": 0.8067112568243553,
+ "eval_runtime": 0.2326,
+ "eval_samples_per_second": 700.909,
+ "eval_steps_per_second": 4.3,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5226
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9272916730860608,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6847915636842973,
+ "eval_f1_macro": 0.8136435649304945,
+ "eval_loss": 0.2689346969127655,
+ "eval_pr_auc": 0.6709998700001464,
+ "eval_precision": 0.7001347708894878,
+ "eval_precision_macro": 0.8194386429297739,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.8081925820956238,
+ "eval_runtime": 0.1664,
+ "eval_samples_per_second": 979.381,
+ "eval_steps_per_second": 6.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5252
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9025828757372382,
+ "eval_auc": 0.9274415730349983,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6833057851239669,
+ "eval_f1_macro": 0.8128720237282395,
+ "eval_loss": 0.269077867269516,
+ "eval_pr_auc": 0.6716085952371595,
+ "eval_precision": 0.7009155645981688,
+ "eval_precision_macro": 0.819534880211639,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.8066604045173044,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.756,
+ "eval_steps_per_second": 3.913,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5278
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9274026625041677,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6848095002474023,
+ "eval_f1_macro": 0.8136877723940104,
+ "eval_loss": 0.2691201865673065,
+ "eval_pr_auc": 0.6711363619469519,
+ "eval_precision": 0.700877785280216,
+ "eval_precision_macro": 0.8197612917781423,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6694614640438569,
+ "eval_recall_macro": 0.8079908273922429,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.544,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5304
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9029896278218426,
+ "eval_auc": 0.9274924350745481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6850445691647409,
+ "eval_f1_macro": 0.8138546963859644,
+ "eval_loss": 0.2691231071949005,
+ "eval_pr_auc": 0.6713794419677425,
+ "eval_precision": 0.7017247210010146,
+ "eval_precision_macro": 0.8201640180913157,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8079503109990844,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.331,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5330
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9275284258556915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860062633921213,
+ "eval_f1_macro": 0.8143728753012186,
+ "eval_loss": 0.26890990138053894,
+ "eval_pr_auc": 0.6717808840440014,
+ "eval_precision": 0.7016183412002697,
+ "eval_precision_macro": 0.8202739053624388,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8088271994226215,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.148,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5356
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9030913158429937,
+ "eval_auc": 0.9275997261430513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6854785478547855,
+ "eval_f1_macro": 0.8141000144010073,
+ "eval_loss": 0.2689792513847351,
+ "eval_pr_auc": 0.6718662790463032,
+ "eval_precision": 0.7019263264616424,
+ "eval_precision_macro": 0.8203209943398044,
+ "eval_pred_class_0": 16709,
+ "eval_pred_class_1": 2959,
+ "eval_predicted_binding_ratio": 0.15044742729306487,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8082727876195295,
+ "eval_runtime": 0.195,
+ "eval_samples_per_second": 835.813,
+ "eval_steps_per_second": 5.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9276906860783045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.2688303291797638,
+ "eval_pr_auc": 0.6723694322774509,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.22,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5408
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9277029684919884,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851760039662865,
+ "eval_f1_macro": 0.8139715080669648,
+ "eval_loss": 0.2689387798309326,
+ "eval_pr_auc": 0.6722283267888528,
+ "eval_precision": 0.7027118644067797,
+ "eval_precision_macro": 0.8206106277411336,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8077787367749694,
+ "eval_runtime": 0.2435,
+ "eval_samples_per_second": 669.46,
+ "eval_steps_per_second": 4.107,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5434
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9277600593308708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6846370100876468,
+ "eval_f1_macro": 0.8136754097270521,
+ "eval_loss": 0.26892563700675964,
+ "eval_pr_auc": 0.6725269853749476,
+ "eval_precision": 0.7026476578411406,
+ "eval_precision_macro": 0.8204961767258567,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.807325202323568,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.763,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5460
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9278453744167288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.686128979053274,
+ "eval_f1_macro": 0.8144677293907912,
+ "eval_loss": 0.26878559589385986,
+ "eval_pr_auc": 0.6729846306066621,
+ "eval_precision": 0.7022282241728561,
+ "eval_precision_macro": 0.8205562286912407,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808756502550197,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.879,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5486
+ },
+ {
+ "epoch": 211.53846153846155,
+ "grad_norm": 16655.041015625,
+ "learning_rate": 7.144675667015729e-07,
+ "loss": 0.2259,
+ "step": 5500
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9028879398006915,
+ "eval_auc": 0.9279623006591996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6845061116617113,
+ "eval_f1_macro": 0.8135588667797169,
+ "eval_loss": 0.26880088448524475,
+ "eval_pr_auc": 0.6734939312101108,
+ "eval_precision": 0.7016593294954284,
+ "eval_precision_macro": 0.8200489288817256,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6681715575620768,
+ "eval_recall_macro": 0.8074967765476829,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.433,
+ "eval_steps_per_second": 4.616,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5512
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279055601902797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6863359156090325,
+ "eval_f1_macro": 0.8145677594216373,
+ "eval_loss": 0.2687283456325531,
+ "eval_pr_auc": 0.6734074239428265,
+ "eval_precision": 0.7019554956169926,
+ "eval_precision_macro": 0.820472419105347,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.80901861821211,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.742,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5538
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279595755594916,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6864392815949909,
+ "eval_f1_macro": 0.8146177229810407,
+ "eval_loss": 0.2687055766582489,
+ "eval_pr_auc": 0.6734235549479375,
+ "eval_precision": 0.7018194070080862,
+ "eval_precision_macro": 0.8204306615878754,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8091496760430665,
+ "eval_runtime": 0.225,
+ "eval_samples_per_second": 724.337,
+ "eval_steps_per_second": 4.444,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5564
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9280330753916157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685383980181668,
+ "eval_f1_macro": 0.8140720567955604,
+ "eval_loss": 0.2687467932701111,
+ "eval_pr_auc": 0.6737104569152422,
+ "eval_precision": 0.7024373730534867,
+ "eval_precision_macro": 0.8205258541706347,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8080408524368825,
+ "eval_runtime": 0.1833,
+ "eval_samples_per_second": 889.114,
+ "eval_steps_per_second": 5.455,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5590
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9281060496687963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860158311345647,
+ "eval_f1_macro": 0.8143952682963037,
+ "eval_loss": 0.26888203620910645,
+ "eval_pr_auc": 0.6738155361634312,
+ "eval_precision": 0.7019912251096861,
+ "eval_precision_macro": 0.8204358998939631,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808726322070931,
+ "eval_runtime": 0.1764,
+ "eval_samples_per_second": 923.938,
+ "eval_steps_per_second": 5.668,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9281046287239484,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6865425794761983,
+ "eval_f1_macro": 0.8146676522813128,
+ "eval_loss": 0.2686736285686493,
+ "eval_pr_auc": 0.6740322097472393,
+ "eval_precision": 0.7016835016835017,
+ "eval_precision_macro": 0.8203890020095554,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.672041277007417,
+ "eval_recall_macro": 0.809280733874023,
+ "eval_runtime": 0.2537,
+ "eval_samples_per_second": 642.37,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5642
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9282425576991689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862615587846763,
+ "eval_f1_macro": 0.814585106315415,
+ "eval_loss": 0.26872488856315613,
+ "eval_pr_auc": 0.6745604450622946,
+ "eval_precision": 0.7032148900169205,
+ "eval_precision_macro": 0.8210025266814094,
+ "eval_pred_class_0": 16713,
+ "eval_pred_class_1": 2955,
+ "eval_predicted_binding_ratio": 0.15024405125076265,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.808584928326082,
+ "eval_runtime": 0.2515,
+ "eval_samples_per_second": 648.077,
+ "eval_steps_per_second": 3.976,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5668
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9283728174652107,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.26861947774887085,
+ "eval_pr_auc": 0.675175157595335,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 605.917,
+ "eval_steps_per_second": 3.717,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5694
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9035997559487492,
+ "eval_auc": 0.9283705984554486,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6875411997363217,
+ "eval_f1_macro": 0.8152747479984963,
+ "eval_loss": 0.2684967517852783,
+ "eval_pr_auc": 0.6752603675091132,
+ "eval_precision": 0.703067071115605,
+ "eval_precision_macro": 0.8211461336058236,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.809754112890798,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.018,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5720
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9285080992007146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871489924017179,
+ "eval_f1_macro": 0.8151206773197821,
+ "eval_loss": 0.2685548961162567,
+ "eval_pr_auc": 0.6758164431668767,
+ "eval_precision": 0.7043684388757196,
+ "eval_precision_macro": 0.8216427895844347,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.809028126863591,
+ "eval_runtime": 0.1986,
+ "eval_samples_per_second": 820.602,
+ "eval_steps_per_second": 5.034,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5746
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9285004105265384,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870052770448549,
+ "eval_f1_macro": 0.8149801571567146,
+ "eval_loss": 0.2685752809047699,
+ "eval_pr_auc": 0.6755687553750968,
+ "eval_precision": 0.7030037124535943,
+ "eval_precision_macro": 0.8210319370409247,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8093005784393965,
+ "eval_runtime": 0.2128,
+ "eval_samples_per_second": 765.976,
+ "eval_steps_per_second": 4.699,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5772
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9285113303903685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871913072110636,
+ "eval_f1_macro": 0.815034532807023,
+ "eval_loss": 0.2685534358024597,
+ "eval_pr_auc": 0.675465436437485,
+ "eval_precision": 0.7019845274133871,
+ "eval_precision_macro": 0.8206238899420935,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8097644488046905,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.434,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5798
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9286790602773953,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685742639761826,
+ "eval_f1_macro": 0.814334221653217,
+ "eval_loss": 0.2686038315296173,
+ "eval_pr_auc": 0.6763975611431872,
+ "eval_precision": 0.7039049235993209,
+ "eval_precision_macro": 0.8212163498580232,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8079296391712996,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5824
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9286844910118134,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6857801944307135,
+ "eval_f1_macro": 0.8142280597608222,
+ "eval_loss": 0.268373966217041,
+ "eval_pr_auc": 0.6766849702960268,
+ "eval_precision": 0.7011455525606469,
+ "eval_precision_macro": 0.8200338541246348,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8087668384640894,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.527,
+ "eval_steps_per_second": 3.819,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9286265437130228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6868321551865856,
+ "eval_f1_macro": 0.8147720454758898,
+ "eval_loss": 0.268480122089386,
+ "eval_pr_auc": 0.6762864798501788,
+ "eval_precision": 0.7005365526492288,
+ "eval_precision_macro": 0.8199434531195322,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8098756620702734,
+ "eval_runtime": 0.1801,
+ "eval_samples_per_second": 905.249,
+ "eval_steps_per_second": 5.554,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5876
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9286911091111043,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6878185106033208,
+ "eval_f1_macro": 0.8153554406082493,
+ "eval_loss": 0.2684793770313263,
+ "eval_pr_auc": 0.6763719243742072,
+ "eval_precision": 0.7015425888665325,
+ "eval_precision_macro": 0.8205363669491479,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.810449918438739,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.327,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5902
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9287447935753516,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870580496628844,
+ "eval_f1_macro": 0.8149167860703537,
+ "eval_loss": 0.2684626877307892,
+ "eval_pr_auc": 0.6765555444970285,
+ "eval_precision": 0.701006711409396,
+ "eval_precision_macro": 0.8201821668264622,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8099360230288054,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.184,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5928
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9037522879804759,
+ "eval_auc": 0.92885044958403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6877783275606135,
+ "eval_f1_macro": 0.815442675636767,
+ "eval_loss": 0.2684047222137451,
+ "eval_pr_auc": 0.6771563538797724,
+ "eval_precision": 0.7039162727886563,
+ "eval_precision_macro": 0.8215498998326138,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6723637536278619,
+ "eval_recall_macro": 0.8097135964976395,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.352,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5954
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9288052907888691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6866359447004609,
+ "eval_f1_macro": 0.814695001815053,
+ "eval_loss": 0.2683703601360321,
+ "eval_pr_auc": 0.6770692308922137,
+ "eval_precision": 0.7011764705882353,
+ "eval_precision_macro": 0.8201862703986524,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80951266905667,
+ "eval_runtime": 0.1803,
+ "eval_samples_per_second": 903.896,
+ "eval_steps_per_second": 5.545,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5980
+ },
+ {
+ "epoch": 230.76923076923077,
+ "grad_norm": 18226.349609375,
+ "learning_rate": 6.520804793983146e-07,
+ "loss": 0.2213,
+ "step": 6000
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289028104284194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881685041961494,
+ "eval_f1_macro": 0.8155957226774667,
+ "eval_loss": 0.26838722825050354,
+ "eval_pr_auc": 0.677382862396771,
+ "eval_precision": 0.7026209677419355,
+ "eval_precision_macro": 0.821056469972094,
+ "eval_pred_class_0": 16692,
+ "eval_pred_class_1": 2976,
+ "eval_predicted_binding_ratio": 0.1513117754728493,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8104395825248465,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.676,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6006
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289413705892875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882710972199375,
+ "eval_f1_macro": 0.8156453059542872,
+ "eval_loss": 0.2681979238986969,
+ "eval_pr_auc": 0.6777962434595076,
+ "eval_precision": 0.7024848891873741,
+ "eval_precision_macro": 0.8210147633474318,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8105706403558031,
+ "eval_runtime": 0.2338,
+ "eval_samples_per_second": 697.187,
+ "eval_steps_per_second": 4.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6032
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9290857424788171,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6879233437964646,
+ "eval_f1_macro": 0.8155838213438953,
+ "eval_loss": 0.2683457136154175,
+ "eval_pr_auc": 0.6782535995592411,
+ "eval_precision": 0.7052845528455285,
+ "eval_precision_macro": 0.8221624965711252,
+ "eval_pred_class_0": 16716,
+ "eval_pred_class_1": 2952,
+ "eval_predicted_binding_ratio": 0.150091519219036,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.8094411449218342,
+ "eval_runtime": 0.2259,
+ "eval_samples_per_second": 721.682,
+ "eval_steps_per_second": 4.427,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6058
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9291390181781085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881400726792204,
+ "eval_f1_macro": 0.8157063562723066,
+ "eval_loss": 0.26850852370262146,
+ "eval_pr_auc": 0.678201898193761,
+ "eval_precision": 0.7053843548933288,
+ "eval_precision_macro": 0.8222404873479507,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8096023832320567,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.187,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9291422688327602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687767886580943,
+ "eval_f1_macro": 0.8154198615351363,
+ "eval_loss": 0.26855188608169556,
+ "eval_pr_auc": 0.6780321638936206,
+ "eval_precision": 0.7035413153456999,
+ "eval_precision_macro": 0.8213868942770528,
+ "eval_pred_class_0": 16703,
+ "eval_pred_class_1": 2965,
+ "eval_predicted_binding_ratio": 0.1507524913565182,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80981447384933,
+ "eval_runtime": 0.1711,
+ "eval_samples_per_second": 952.835,
+ "eval_steps_per_second": 5.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6110
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9292183185796111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881081972620815,
+ "eval_f1_macro": 0.8156376648859622,
+ "eval_loss": 0.2684246003627777,
+ "eval_pr_auc": 0.6783782374353945,
+ "eval_precision": 0.7042538825118163,
+ "eval_precision_macro": 0.8217486340608884,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.8099050152871281,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.985,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6136
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.9291723227895398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.2682003080844879,
+ "eval_pr_auc": 0.678474943261759,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.44,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6162
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9292599542101496,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891891891891891,
+ "eval_f1_macro": 0.8162490373023017,
+ "eval_loss": 0.2683660686016083,
+ "eval_pr_auc": 0.6787382085049865,
+ "eval_precision": 0.704752275025278,
+ "eval_precision_macro": 0.8221384271958916,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8107112068382407,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.396,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6188
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9292687329242089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886434811274106,
+ "eval_f1_macro": 0.8159319482645679,
+ "eval_loss": 0.2683408558368683,
+ "eval_pr_auc": 0.6788700814485856,
+ "eval_precision": 0.7043155765340526,
+ "eval_precision_macro": 0.8218620153057044,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8103585497385295,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.65,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6214
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9038031319910514,
+ "eval_auc": 0.9293738244479479,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886109282422647,
+ "eval_f1_macro": 0.8158628904590758,
+ "eval_loss": 0.26828694343566895,
+ "eval_pr_auc": 0.6792947923537326,
+ "eval_precision": 0.7031932773109244,
+ "eval_precision_macro": 0.8213743898086402,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8106611817936011,
+ "eval_runtime": 0.1692,
+ "eval_samples_per_second": 963.308,
+ "eval_steps_per_second": 5.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6240
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.929388987681323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.26832982897758484,
+ "eval_pr_auc": 0.679377997729221,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.159,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6266
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9294373582011398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891669410602568,
+ "eval_f1_macro": 0.8162027357577154,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6798375856033828,
+ "eval_precision": 0.7040026908846283,
+ "eval_precision_macro": 0.8218126661970311,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8109129615416215,
+ "eval_runtime": 0.2508,
+ "eval_samples_per_second": 649.948,
+ "eval_steps_per_second": 3.987,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6292
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.929443917905437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6897798225435425,
+ "eval_f1_macro": 0.8164989338281623,
+ "eval_loss": 0.26814863085746765,
+ "eval_pr_auc": 0.6797263827568155,
+ "eval_precision": 0.7031825795644892,
+ "eval_precision_macro": 0.8215607197408852,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8116993085273606,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 642.959,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9295684160320964,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.26816216111183167,
+ "eval_pr_auc": 0.6803755613590039,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2375,
+ "eval_samples_per_second": 686.328,
+ "eval_steps_per_second": 4.211,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6344
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9295755402213329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6888815572418344,
+ "eval_f1_macro": 0.8161003326270482,
+ "eval_loss": 0.2682454288005829,
+ "eval_pr_auc": 0.6803390373338819,
+ "eval_precision": 0.7051671732522796,
+ "eval_precision_macro": 0.8222669528798059,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6733311834891971,
+ "eval_recall_macro": 0.8103180333453712,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.048,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6370
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9296204070415253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.2682053744792938,
+ "eval_pr_auc": 0.6804463740899893,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2411,
+ "eval_samples_per_second": 676.2,
+ "eval_steps_per_second": 4.148,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6396
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9296705294111545,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909270541742137,
+ "eval_f1_macro": 0.817248994423186,
+ "eval_loss": 0.2681281566619873,
+ "eval_pr_auc": 0.6807214505356617,
+ "eval_precision": 0.7059219380888291,
+ "eval_precision_macro": 0.8229238344013863,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119002359683303,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.371,
+ "eval_steps_per_second": 5.094,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6422
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9296871135893773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912442396313364,
+ "eval_f1_macro": 0.8174200753177728,
+ "eval_loss": 0.26824310421943665,
+ "eval_pr_auc": 0.6805326752113899,
+ "eval_precision": 0.7058823529411765,
+ "eval_precision_macro": 0.8229585490219571,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8121925321095093,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.162,
+ "eval_steps_per_second": 3.78,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6448
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.929768866580617,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6914841047603361,
+ "eval_f1_macro": 0.8175893393183914,
+ "eval_loss": 0.2681940495967865,
+ "eval_pr_auc": 0.6810052289277716,
+ "eval_precision": 0.7067340067340068,
+ "eval_precision_macro": 0.8233634101223034,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8121520157163508,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.397,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6474
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 35924.55078125,
+ "learning_rate": 5.869563021464528e-07,
+ "loss": 0.2171,
+ "step": 6500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9298367215633461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912928759894459,
+ "eval_f1_macro": 0.817514675551828,
+ "eval_loss": 0.2681121826171875,
+ "eval_pr_auc": 0.681415067318076,
+ "eval_precision": 0.7073911576105298,
+ "eval_precision_macro": 0.8236147646777582,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8117890227027473,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.453,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9046674801708359,
+ "eval_auc": 0.9298684105799504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909510466457887,
+ "eval_f1_macro": 0.8172961371074986,
+ "eval_loss": 0.2681705951690674,
+ "eval_pr_auc": 0.6814913977659953,
+ "eval_precision": 0.7066756574511126,
+ "eval_precision_macro": 0.8232516115060616,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116984812649493,
+ "eval_runtime": 0.271,
+ "eval_samples_per_second": 601.566,
+ "eval_steps_per_second": 3.691,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6526
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300244614682288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6893926857521099,
+ "eval_f1_macro": 0.8165072340543806,
+ "eval_loss": 0.2683667540550232,
+ "eval_pr_auc": 0.6820860933741758,
+ "eval_precision": 0.7080217539089055,
+ "eval_precision_macro": 0.8235792136757251,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8099343685039828,
+ "eval_runtime": 0.2316,
+ "eval_samples_per_second": 703.782,
+ "eval_steps_per_second": 4.318,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9299541441632636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6910408432147562,
+ "eval_f1_macro": 0.8173217684087248,
+ "eval_loss": 0.26824095845222473,
+ "eval_pr_auc": 0.6816317339832768,
+ "eval_precision": 0.7061595422416694,
+ "eval_precision_macro": 0.8230444354317887,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119304164475962,
+ "eval_runtime": 0.2652,
+ "eval_samples_per_second": 614.632,
+ "eval_steps_per_second": 3.771,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6578
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.9044641041285336,
+ "eval_auc": 0.9299257349988078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913093477903729,
+ "eval_f1_macro": 0.8173981849782266,
+ "eval_loss": 0.2680823504924774,
+ "eval_pr_auc": 0.6815799614400636,
+ "eval_precision": 0.7046215673141326,
+ "eval_precision_macro": 0.8224282755645115,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6784908094163173,
+ "eval_recall_macro": 0.8126262219955371,
+ "eval_runtime": 0.1721,
+ "eval_samples_per_second": 947.131,
+ "eval_steps_per_second": 5.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6604
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300487927156216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6902129064202014,
+ "eval_f1_macro": 0.8169037907104764,
+ "eval_loss": 0.2682023346424103,
+ "eval_pr_auc": 0.6819970017971004,
+ "eval_precision": 0.7068965517241379,
+ "eval_precision_macro": 0.8232268515652408,
+ "eval_pred_class_0": 16710,
+ "eval_pred_class_1": 2958,
+ "eval_predicted_binding_ratio": 0.1503965832824893,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8109828311516347,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.732,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6630
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9300701263533355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6908371786420567,
+ "eval_f1_macro": 0.8172233266061071,
+ "eval_loss": 0.26801708340644836,
+ "eval_pr_auc": 0.6822973012887885,
+ "eval_precision": 0.7064374789349511,
+ "eval_precision_macro": 0.8231307207859595,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116683007856833,
+ "eval_runtime": 0.2302,
+ "eval_samples_per_second": 708.13,
+ "eval_steps_per_second": 4.344,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6656
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.9048708562131381,
+ "eval_auc": 0.9301923470752391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6911012052171042,
+ "eval_f1_macro": 0.8174397819709127,
+ "eval_loss": 0.26805296540260315,
+ "eval_pr_auc": 0.6828138126269635,
+ "eval_precision": 0.7080514208389715,
+ "eval_precision_macro": 0.8238677400987582,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8114260296891438,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.189,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6682
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9301935928351055,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912806988626998,
+ "eval_f1_macro": 0.8174910212279173,
+ "eval_loss": 0.26811105012893677,
+ "eval_pr_auc": 0.6827059930276215,
+ "eval_precision": 0.7070128118678355,
+ "eval_precision_macro": 0.8234501252489699,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8118899000544377,
+ "eval_runtime": 0.2455,
+ "eval_samples_per_second": 663.913,
+ "eval_steps_per_second": 4.073,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6708
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9049217002237137,
+ "eval_auc": 0.9302954726341887,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913172664245626,
+ "eval_f1_macro": 0.817561992789179,
+ "eval_loss": 0.2680983245372772,
+ "eval_pr_auc": 0.6832649652047296,
+ "eval_precision": 0.7081501521812648,
+ "eval_precision_macro": 0.8239452215038332,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6752660432118671,
+ "eval_recall_macro": 0.8115872679993663,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.63,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6734
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9302687082620565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920039486673247,
+ "eval_f1_macro": 0.8178583697873878,
+ "eval_loss": 0.2680213451385498,
+ "eval_pr_auc": 0.6832868902825516,
+ "eval_precision": 0.7064158548874706,
+ "eval_precision_macro": 0.8233115761166728,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8127064275194428,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6760
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303376240871719,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.691635043722158,
+ "eval_f1_macro": 0.817733374603378,
+ "eval_loss": 0.2681059241294861,
+ "eval_pr_auc": 0.6834436649713198,
+ "eval_precision": 0.7081081081081081,
+ "eval_precision_macro": 0.8239786410782342,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8118795641405453,
+ "eval_runtime": 0.1861,
+ "eval_samples_per_second": 875.956,
+ "eval_steps_per_second": 5.374,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303520963131211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920415224913494,
+ "eval_f1_macro": 0.8179298603528982,
+ "eval_loss": 0.2680259048938751,
+ "eval_pr_auc": 0.683712511498021,
+ "eval_precision": 0.7075471698113207,
+ "eval_precision_macro": 0.8238035250254208,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8124037954643712,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.771,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6812
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9303956005834594,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.692700609655627,
+ "eval_f1_macro": 0.8183195235731167,
+ "eval_loss": 0.2679544985294342,
+ "eval_pr_auc": 0.6840791766505604,
+ "eval_precision": 0.7082210242587601,
+ "eval_precision_macro": 0.8242003324886615,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8127866330433483,
+ "eval_runtime": 0.224,
+ "eval_samples_per_second": 727.59,
+ "eval_steps_per_second": 4.464,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6838
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304257713302264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933465085638999,
+ "eval_f1_macro": 0.8186850387937344,
+ "eval_loss": 0.26790833473205566,
+ "eval_pr_auc": 0.6843453300290927,
+ "eval_precision": 0.70851565129586,
+ "eval_precision_macro": 0.8244321084532245,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8132703479740159,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.122,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6864
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304709495903853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934474810668423,
+ "eval_f1_macro": 0.8187338421508825,
+ "eval_loss": 0.26806485652923584,
+ "eval_pr_auc": 0.684236710788699,
+ "eval_precision": 0.7083753784056509,
+ "eval_precision_macro": 0.8243883480827296,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.8134014058049723,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 646.986,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6890
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9305072226139984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937767533750412,
+ "eval_f1_macro": 0.8189285426426647,
+ "eval_loss": 0.2682030200958252,
+ "eval_pr_auc": 0.6841419811140891,
+ "eval_precision": 0.708711738984191,
+ "eval_precision_macro": 0.8245864774585525,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8135928245944608,
+ "eval_runtime": 0.1928,
+ "eval_samples_per_second": 845.449,
+ "eval_steps_per_second": 5.187,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6916
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9304825799266392,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6942937016938003,
+ "eval_f1_macro": 0.8191961667392471,
+ "eval_loss": 0.2681148052215576,
+ "eval_pr_auc": 0.6840537331785722,
+ "eval_precision": 0.7083892617449664,
+ "eval_precision_macro": 0.824532598274209,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6807481457594324,
+ "eval_recall_macro": 0.8141472363975528,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.157,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6942
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.930527787384295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6951801283105774,
+ "eval_f1_macro": 0.8197312675109731,
+ "eval_loss": 0.2679014503955841,
+ "eval_pr_auc": 0.6848281199608002,
+ "eval_precision": 0.7095366017461383,
+ "eval_precision_macro": 0.8251697388598875,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8145904349350619,
+ "eval_runtime": 0.2174,
+ "eval_samples_per_second": 749.726,
+ "eval_steps_per_second": 4.6,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6968
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305513205667733,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6949375410913873,
+ "eval_f1_macro": 0.8195606747920547,
+ "eval_loss": 0.26782992482185364,
+ "eval_pr_auc": 0.6850252128050689,
+ "eval_precision": 0.7086825343613812,
+ "eval_precision_macro": 0.8247638023919581,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8146309513282204,
+ "eval_runtime": 0.2304,
+ "eval_samples_per_second": 707.619,
+ "eval_steps_per_second": 4.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6994
+ },
+ {
+ "epoch": 269.2307692307692,
+ "grad_norm": 17604.1328125,
+ "learning_rate": 5.202671165416819e-07,
+ "loss": 0.2132,
+ "step": 7000
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305936180072407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950377916529741,
+ "eval_f1_macro": 0.8196091213903969,
+ "eval_loss": 0.2680298984050751,
+ "eval_pr_auc": 0.6846741481205671,
+ "eval_precision": 0.7085427135678392,
+ "eval_precision_macro": 0.8247203168031008,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.814762009159177,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.148,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9306496966662317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6938909929194796,
+ "eval_f1_macro": 0.8190015347004277,
+ "eval_loss": 0.2681294083595276,
+ "eval_pr_auc": 0.6847333752342389,
+ "eval_precision": 0.7089502018842531,
+ "eval_precision_macro": 0.8247074919339809,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8136230050737269,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.335,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7046
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.9053792963188937,
+ "eval_auc": 0.9306815900653141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6932586121641668,
+ "eval_f1_macro": 0.8186603259504293,
+ "eval_loss": 0.26823949813842773,
+ "eval_pr_auc": 0.6847385438827486,
+ "eval_precision": 0.7090357383681726,
+ "eval_precision_macro": 0.8246412077064189,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130384127913689,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 909.129,
+ "eval_steps_per_second": 5.577,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7072
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9307212597310633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6931443638760711,
+ "eval_f1_macro": 0.8185873316314347,
+ "eval_loss": 0.2681174576282501,
+ "eval_pr_auc": 0.6852124592316542,
+ "eval_precision": 0.7087967644084934,
+ "eval_precision_macro": 0.8245199318120546,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130082323121028,
+ "eval_runtime": 0.1693,
+ "eval_samples_per_second": 962.904,
+ "eval_steps_per_second": 5.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7098
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9307878592214269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6947368421052632,
+ "eval_f1_macro": 0.8194636820581644,
+ "eval_loss": 0.2679351270198822,
+ "eval_pr_auc": 0.685659065605018,
+ "eval_precision": 0.7089627391742196,
+ "eval_precision_macro": 0.8248510741829513,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6810706223798775,
+ "eval_recall_macro": 0.8143688356663075,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.933,
+ "eval_steps_per_second": 4.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7124
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9308354511413273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6945496459739832,
+ "eval_f1_macro": 0.8193909880953703,
+ "eval_loss": 0.2679973542690277,
+ "eval_pr_auc": 0.6858486839853987,
+ "eval_precision": 0.7096231493943472,
+ "eval_precision_macro": 0.8251038602745574,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8140058426527039,
+ "eval_runtime": 0.1976,
+ "eval_samples_per_second": 825.097,
+ "eval_steps_per_second": 5.062,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7150
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9308506727696961,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694320987654321,
+ "eval_f1_macro": 0.819244917025501,
+ "eval_loss": 0.26807495951652527,
+ "eval_pr_auc": 0.6856600891550617,
+ "eval_precision": 0.7091459314055144,
+ "eval_precision_macro": 0.8248616921913159,
+ "eval_pred_class_0": 16694,
+ "eval_pred_class_1": 2974,
+ "eval_predicted_binding_ratio": 0.1512100874516982,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8139454816941719,
+ "eval_runtime": 0.2211,
+ "eval_samples_per_second": 737.174,
+ "eval_steps_per_second": 4.523,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7176
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9308575828439557,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946640316205533,
+ "eval_f1_macro": 0.8194640504423113,
+ "eval_loss": 0.26810142397880554,
+ "eval_pr_auc": 0.6855507532370865,
+ "eval_precision": 0.709861999326826,
+ "eval_precision_macro": 0.8252250644654733,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.81403602313197,
+ "eval_runtime": 0.2072,
+ "eval_samples_per_second": 786.583,
+ "eval_steps_per_second": 4.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7202
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9309145374278527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941467436108821,
+ "eval_f1_macro": 0.8191962415719043,
+ "eval_loss": 0.26818612217903137,
+ "eval_pr_auc": 0.6856281742604067,
+ "eval_precision": 0.7101889338731444,
+ "eval_precision_macro": 0.8252812485457677,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.813481611328878,
+ "eval_runtime": 0.1872,
+ "eval_samples_per_second": 870.561,
+ "eval_steps_per_second": 5.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7228
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9309855846702396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.69433465085639,
+ "eval_f1_macro": 0.8192692975301671,
+ "eval_loss": 0.2680346667766571,
+ "eval_pr_auc": 0.686232632159634,
+ "eval_precision": 0.7095254123190845,
+ "eval_precision_macro": 0.825026825462411,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6797807158980974,
+ "eval_recall_macro": 0.8138446043424814,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.212,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.930922634866985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6957377049180328,
+ "eval_f1_macro": 0.8199473215888755,
+ "eval_loss": 0.26806843280792236,
+ "eval_pr_auc": 0.6857403383581059,
+ "eval_precision": 0.70756918972991,
+ "eval_precision_macro": 0.8244187060893835,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8156794139758723,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.751,
+ "eval_steps_per_second": 5.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7280
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9309180411274773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959227116423776,
+ "eval_f1_macro": 0.8200188959217034,
+ "eval_loss": 0.2680058181285858,
+ "eval_pr_auc": 0.6857513786045211,
+ "eval_precision": 0.7069194943446441,
+ "eval_precision_macro": 0.8241715464761271,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8160424069894758,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.248,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7306
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9055318283506203,
+ "eval_auc": 0.930996368279084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950098489822718,
+ "eval_f1_macro": 0.8195600321797414,
+ "eval_loss": 0.2679016888141632,
+ "eval_pr_auc": 0.6864832067617813,
+ "eval_precision": 0.7077900367769977,
+ "eval_precision_macro": 0.824393309448042,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.814963763862558,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.664,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7332
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9310520771031147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.2679460644721985,
+ "eval_pr_auc": 0.6867185372000254,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.949,
+ "eval_steps_per_second": 4.994,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7358
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.931065877786636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.26795604825019836,
+ "eval_pr_auc": 0.6868264245068394,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2632,
+ "eval_samples_per_second": 619.283,
+ "eval_steps_per_second": 3.799,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7384
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9310428312291054,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696165191740413,
+ "eval_f1_macro": 0.8201894743681304,
+ "eval_loss": 0.2679717540740967,
+ "eval_pr_auc": 0.6865300625458848,
+ "eval_precision": 0.7077640786404532,
+ "eval_precision_macro": 0.8245726255085029,
+ "eval_pred_class_0": 16667,
+ "eval_pred_class_1": 3001,
+ "eval_predicted_binding_ratio": 0.15258287573723817,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160018905963173,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.133,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7410
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9311142093764568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946815495732108,
+ "eval_f1_macro": 0.8193658018591599,
+ "eval_loss": 0.2679450213909149,
+ "eval_pr_auc": 0.6869273130451355,
+ "eval_precision": 0.7074557004346372,
+ "eval_precision_macro": 0.8241961598653369,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8147723450730694,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 908.844,
+ "eval_steps_per_second": 5.576,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7436
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9311615190538873,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695695037791653,
+ "eval_f1_macro": 0.8199978948356761,
+ "eval_loss": 0.2679760158061981,
+ "eval_pr_auc": 0.6872164392300586,
+ "eval_precision": 0.7092127303182579,
+ "eval_precision_macro": 0.8251152664358777,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8151448467381539,
+ "eval_runtime": 0.1776,
+ "eval_samples_per_second": 917.947,
+ "eval_steps_per_second": 5.632,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7462
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9311948528628156,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959237343852729,
+ "eval_f1_macro": 0.8201439915761322,
+ "eval_loss": 0.26804664731025696,
+ "eval_pr_auc": 0.6873402512916988,
+ "eval_precision": 0.7096882333221589,
+ "eval_precision_macro": 0.8253565529811274,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.815205207696686,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.503,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 288.46153846153845,
+ "grad_norm": 18250.5078125,
+ "learning_rate": 4.5321317063898914e-07,
+ "loss": 0.2101,
+ "step": 7500
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9312627857055362,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6958949096880132,
+ "eval_f1_macro": 0.8200944800500465,
+ "eval_loss": 0.26791396737098694,
+ "eval_pr_auc": 0.6879250403674073,
+ "eval_precision": 0.7089327534292406,
+ "eval_precision_macro": 0.8250281609942534,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.8154069624000669,
+ "eval_runtime": 0.2277,
+ "eval_samples_per_second": 715.925,
+ "eval_steps_per_second": 4.392,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7514
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9312241087546806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696522309711286,
+ "eval_f1_macro": 0.8204332366847645,
+ "eval_loss": 0.26787513494491577,
+ "eval_pr_auc": 0.6878638814996979,
+ "eval_precision": 0.7088480801335559,
+ "eval_precision_macro": 0.8250951850316913,
+ "eval_pred_class_0": 16673,
+ "eval_pred_class_1": 2995,
+ "eval_predicted_binding_ratio": 0.15227781167378482,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.815991554682425,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.836,
+ "eval_steps_per_second": 4.52,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7540
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9312868249779602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975369458128079,
+ "eval_f1_macro": 0.8210658921448086,
+ "eval_loss": 0.26807519793510437,
+ "eval_pr_auc": 0.6876156626538744,
+ "eval_precision": 0.7106055536968886,
+ "eval_precision_macro": 0.8260144502101566,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8163640563475095,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.522,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7566
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9313246454689077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971935007385525,
+ "eval_f1_macro": 0.8208465473190101,
+ "eval_loss": 0.26810526847839355,
+ "eval_pr_auc": 0.6877564948921628,
+ "eval_precision": 0.7098930481283422,
+ "eval_precision_macro": 0.8256529284776994,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162735149097116,
+ "eval_runtime": 0.206,
+ "eval_samples_per_second": 791.322,
+ "eval_steps_per_second": 4.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7592
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9313187962370344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969647251845775,
+ "eval_f1_macro": 0.8207004065741184,
+ "eval_loss": 0.26795074343681335,
+ "eval_pr_auc": 0.6879453119558532,
+ "eval_precision": 0.7094188376753507,
+ "eval_precision_macro": 0.8254123095657551,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162131539511794,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.954,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7618
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9313842959550158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966809069996713,
+ "eval_f1_macro": 0.820581055003595,
+ "eval_loss": 0.2678394019603729,
+ "eval_pr_auc": 0.6884837475836854,
+ "eval_precision": 0.7102177554438861,
+ "eval_precision_macro": 0.8257076908850431,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157191031066195,
+ "eval_runtime": 0.1788,
+ "eval_samples_per_second": 911.642,
+ "eval_steps_per_second": 5.593,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7644
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314417858263554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696337658071933,
+ "eval_f1_macro": 0.8203618088536944,
+ "eval_loss": 0.26788315176963806,
+ "eval_pr_auc": 0.6885944465147925,
+ "eval_precision": 0.7095046854082999,
+ "eval_precision_macro": 0.8253458678840061,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156285616688215,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.832,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7670
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314500389854711,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696636587366694,
+ "eval_f1_macro": 0.8205062543343502,
+ "eval_loss": 0.2678987681865692,
+ "eval_pr_auc": 0.6885305487751676,
+ "eval_precision": 0.7090848363393454,
+ "eval_precision_macro": 0.8252153220919469,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816021735161691,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 639.002,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7696
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.931550575699698,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965959546127282,
+ "eval_f1_macro": 0.8205577878611678,
+ "eval_loss": 0.2679198086261749,
+ "eval_pr_auc": 0.6889351423284887,
+ "eval_precision": 0.710738255033557,
+ "eval_precision_macro": 0.8259168264621285,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8154871679239726,
+ "eval_runtime": 0.2409,
+ "eval_samples_per_second": 676.584,
+ "eval_steps_per_second": 4.151,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9316071409836368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964256300444738,
+ "eval_f1_macro": 0.82051102635547,
+ "eval_loss": 0.2680239677429199,
+ "eval_pr_auc": 0.6888956424322248,
+ "eval_precision": 0.7117845117845117,
+ "eval_precision_macro": 0.8263378182350514,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8150232975586785,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.263,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7748
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9315966104197652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6972235912600624,
+ "eval_f1_macro": 0.8208966763783243,
+ "eval_loss": 0.26803261041641235,
+ "eval_pr_auc": 0.6887732742755047,
+ "eval_precision": 0.7106496985934361,
+ "eval_precision_macro": 0.8259818448607991,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8160717602063305,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.12,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7774
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9315960653998236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966218432272876,
+ "eval_f1_macro": 0.8204813289787078,
+ "eval_loss": 0.26792290806770325,
+ "eval_pr_auc": 0.6890570542847262,
+ "eval_precision": 0.7087087087087087,
+ "eval_precision_macro": 0.8250519729735134,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8161226125133815,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.694,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7800
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9316357350655727,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966957093539372,
+ "eval_f1_macro": 0.8206059967994839,
+ "eval_loss": 0.26809555292129517,
+ "eval_pr_auc": 0.6889378655811479,
+ "eval_precision": 0.710596914822267,
+ "eval_precision_macro": 0.8258725914156881,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815618225754929,
+ "eval_runtime": 0.2178,
+ "eval_samples_per_second": 748.303,
+ "eval_steps_per_second": 4.591,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7826
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9317000084886855,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965811965811965,
+ "eval_f1_macro": 0.8205328694321837,
+ "eval_loss": 0.26798245310783386,
+ "eval_pr_auc": 0.6894273728032447,
+ "eval_precision": 0.7103586992960107,
+ "eval_precision_macro": 0.8257517200405735,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815588045275663,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.477,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7852
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9316820812256066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966513460275772,
+ "eval_f1_macro": 0.8205311837826491,
+ "eval_loss": 0.26780617237091064,
+ "eval_pr_auc": 0.6897785082602487,
+ "eval_precision": 0.7094617184887997,
+ "eval_precision_macro": 0.8253790573615671,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159208578100005,
+ "eval_runtime": 0.1902,
+ "eval_samples_per_second": 856.826,
+ "eval_steps_per_second": 5.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7878
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9317363691047894,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.26783081889152527,
+ "eval_pr_auc": 0.690108350201664,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.745,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7904
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318277475374976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962670613385956,
+ "eval_f1_macro": 0.8203632705580364,
+ "eval_loss": 0.267810195684433,
+ "eval_pr_auc": 0.6906198234983021,
+ "eval_precision": 0.7104026845637584,
+ "eval_precision_macro": 0.82571907957814,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8152957491344841,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.513,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7930
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318495094051658,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26770758628845215,
+ "eval_pr_auc": 0.6907229577841941,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.459,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9318431735483448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6973381531383503,
+ "eval_f1_macro": 0.8209698284488745,
+ "eval_loss": 0.26784345507621765,
+ "eval_pr_auc": 0.6903028236900399,
+ "eval_precision": 0.7108877721943049,
+ "eval_precision_macro": 0.8261026405178202,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161019406855965,
+ "eval_runtime": 0.2339,
+ "eval_samples_per_second": 696.845,
+ "eval_steps_per_second": 4.275,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7982
+ },
+ {
+ "epoch": 307.6923076923077,
+ "grad_norm": 18753.48046875,
+ "learning_rate": 3.8700127731844033e-07,
+ "loss": 0.2071,
+ "step": 8000
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318915635331595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966661192314009,
+ "eval_f1_macro": 0.8205561173351938,
+ "eval_loss": 0.2679731547832489,
+ "eval_pr_auc": 0.6902808443840289,
+ "eval_precision": 0.7098393574297188,
+ "eval_precision_macro": 0.8255431799139001,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81581998045831,
+ "eval_runtime": 0.2227,
+ "eval_samples_per_second": 731.952,
+ "eval_steps_per_second": 4.491,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8008
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319105808361218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975217462662071,
+ "eval_f1_macro": 0.821040751603759,
+ "eval_loss": 0.2678290605545044,
+ "eval_pr_auc": 0.6904266123808676,
+ "eval_precision": 0.7102272727272727,
+ "eval_precision_macro": 0.8258500239865676,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164649336992,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.086,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8034
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319563819762139,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977201902575036,
+ "eval_f1_macro": 0.821136637744354,
+ "eval_loss": 0.2677942216396332,
+ "eval_pr_auc": 0.6905770539125178,
+ "eval_precision": 0.7099465954606141,
+ "eval_precision_macro": 0.8257626451391362,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.816727049361113,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.991,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8060
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9320261250637406,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975673898750822,
+ "eval_f1_macro": 0.8211161862162611,
+ "eval_loss": 0.2678627669811249,
+ "eval_pr_auc": 0.6908896136415948,
+ "eval_precision": 0.7113643982567884,
+ "eval_precision_macro": 0.8263444706297427,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161623016441286,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.505,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8086
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9320056478859348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696551724137931,
+ "eval_f1_macro": 0.8204830448879512,
+ "eval_loss": 0.2678248882293701,
+ "eval_pr_auc": 0.6908301522653787,
+ "eval_precision": 0.7096018735362998,
+ "eval_precision_macro": 0.8254226766806146,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815789799979044,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.086,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8112
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9319988935316585,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.2678254544734955,
+ "eval_pr_auc": 0.6908227024589884,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.545,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8138
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9320198184044164,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971634694212165,
+ "eval_f1_macro": 0.8207964351950081,
+ "eval_loss": 0.2678711414337158,
+ "eval_pr_auc": 0.6909501280556708,
+ "eval_precision": 0.7091394262841895,
+ "eval_precision_macro": 0.8253255619723288,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8164752696130925,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.201,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8164
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9320753325784678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2678229808807373,
+ "eval_pr_auc": 0.6914105766155315,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.375,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9321298151076297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967953985209532,
+ "eval_f1_macro": 0.8206541727499956,
+ "eval_loss": 0.26796844601631165,
+ "eval_pr_auc": 0.6912288183485439,
+ "eval_precision": 0.710455764075067,
+ "eval_precision_macro": 0.8258284574391159,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157492835858855,
+ "eval_runtime": 0.2237,
+ "eval_samples_per_second": 728.567,
+ "eval_steps_per_second": 4.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8216
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9321722877330785,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6956235603817045,
+ "eval_f1_macro": 0.8199989231339035,
+ "eval_loss": 0.2681267261505127,
+ "eval_pr_auc": 0.6912998271961284,
+ "eval_precision": 0.7101108498488411,
+ "eval_precision_macro": 0.8254885924997606,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8148120342038165,
+ "eval_runtime": 0.1877,
+ "eval_samples_per_second": 868.412,
+ "eval_steps_per_second": 5.328,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8242
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9321751880177678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26805615425109863,
+ "eval_pr_auc": 0.6913470747613989,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.932,
+ "eval_steps_per_second": 4.122,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8268
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9321843949617812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961380443714051,
+ "eval_f1_macro": 0.8202653471082613,
+ "eval_loss": 0.267932653427124,
+ "eval_pr_auc": 0.6916487257323465,
+ "eval_precision": 0.7097855227882037,
+ "eval_precision_macro": 0.8254333991308556,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8153664460069084,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.38,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8294
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.932188093311385,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964373666064686,
+ "eval_f1_macro": 0.8204099902666875,
+ "eval_loss": 0.2679634094238281,
+ "eval_pr_auc": 0.6915655068510385,
+ "eval_precision": 0.7093645484949833,
+ "eval_precision_macro": 0.8253022526621696,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815759619499778,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.448,
+ "eval_steps_per_second": 4.027,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8320
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9321906237611137,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965076242006887,
+ "eval_f1_macro": 0.8204083386821658,
+ "eval_loss": 0.26790735125541687,
+ "eval_pr_auc": 0.6917736045731879,
+ "eval_precision": 0.7084723148765844,
+ "eval_precision_macro": 0.8249320182661266,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160924320341154,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.581,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8346
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.932275423024527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6955950032873109,
+ "eval_f1_macro": 0.8199495526481063,
+ "eval_loss": 0.2680682837963104,
+ "eval_pr_auc": 0.6919499023976591,
+ "eval_precision": 0.709353000335233,
+ "eval_precision_macro": 0.8251589694514043,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8150137889071974,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.703,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8372
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9322442887603632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6918309607756195,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.368,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8398
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9322356073712934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960945191992124,
+ "eval_f1_macro": 0.8201909332654507,
+ "eval_loss": 0.2680003046989441,
+ "eval_pr_auc": 0.6918708088537314,
+ "eval_precision": 0.7086535248914133,
+ "eval_precision_macro": 0.8249414550993799,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81566907806198,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.473,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322670238779269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961235216819974,
+ "eval_f1_macro": 0.8202405385118361,
+ "eval_loss": 0.26794886589050293,
+ "eval_pr_auc": 0.6921960622354616,
+ "eval_precision": 0.7094074322062269,
+ "eval_precision_macro": 0.8252690299332196,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815467323358599,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.5,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8450
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322930096501425,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2680268883705139,
+ "eval_pr_auc": 0.6921477872574622,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.1733,
+ "eval_samples_per_second": 940.367,
+ "eval_steps_per_second": 5.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8476
+ },
+ {
+ "epoch": 326.9230769230769,
+ "grad_norm": 17241.076171875,
+ "learning_rate": 3.2282309449959705e-07,
+ "loss": 0.2047,
+ "step": 8500
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323445529646195,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26804205775260925,
+ "eval_pr_auc": 0.6924439463998024,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.47,
+ "eval_steps_per_second": 5.647,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8502
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323743441439272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26802849769592285,
+ "eval_pr_auc": 0.6925977669253861,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1678,
+ "eval_samples_per_second": 971.421,
+ "eval_steps_per_second": 5.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8528
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9323637649175607,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983295119554537,
+ "eval_f1_macro": 0.8214488366277419,
+ "eval_loss": 0.2680566608905792,
+ "eval_pr_auc": 0.6924500045026715,
+ "eval_precision": 0.7094841930116472,
+ "eval_precision_macro": 0.825665699698526,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8174125189951615,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.54,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8554
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.906243644498678,
+ "eval_auc": 0.9324242037360844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6974072858549393,
+ "eval_f1_macro": 0.8209676462967013,
+ "eval_loss": 0.26804018020629883,
+ "eval_pr_auc": 0.6928556723686659,
+ "eval_precision": 0.7099899766120948,
+ "eval_precision_macro": 0.8257296209897056,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164347532199341,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.585,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8580
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9323838917254041,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6981162981162982,
+ "eval_f1_macro": 0.8213280175544326,
+ "eval_loss": 0.2678423821926117,
+ "eval_pr_auc": 0.6929480254197629,
+ "eval_precision": 0.7093874833555259,
+ "eval_precision_macro": 0.8255890849326837,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817251280684939,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.814,
+ "eval_steps_per_second": 4.042,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8606
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9323530591687079,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993784756297023,
+ "eval_f1_macro": 0.8220268454242666,
+ "eval_loss": 0.26779934763908386,
+ "eval_pr_auc": 0.6928104051729911,
+ "eval_precision": 0.7095917690009956,
+ "eval_precision_macro": 0.8258856473344816,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183195878979646,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.815,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8632
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.932432009200248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983450761920367,
+ "eval_f1_macro": 0.8214741659959972,
+ "eval_loss": 0.2679860591888428,
+ "eval_pr_auc": 0.6929996618872077,
+ "eval_precision": 0.7098600932711525,
+ "eval_precision_macro": 0.8258288825890143,
+ "eval_pred_class_0": 16666,
+ "eval_pred_class_1": 3002,
+ "eval_predicted_binding_ratio": 0.1526337197478137,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817311641643471,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.69,
+ "eval_steps_per_second": 5.55,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325279911049631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975521603417119,
+ "eval_f1_macro": 0.8210910370116632,
+ "eval_loss": 0.26808273792266846,
+ "eval_pr_auc": 0.6933573725062909,
+ "eval_precision": 0.7109845947756196,
+ "eval_precision_macro": 0.8261792653772595,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816263178995819,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.721,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8684
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932545276023111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6976362442547603,
+ "eval_f1_macro": 0.8211138747443938,
+ "eval_loss": 0.26816996932029724,
+ "eval_pr_auc": 0.6932853419412803,
+ "eval_precision": 0.710464727515881,
+ "eval_precision_macro": 0.8259705061096825,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816495114178466,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.649,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8710
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9325305604846879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680180072784424,
+ "eval_pr_auc": 0.6935243427052671,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.515,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8736
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932509878924404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.698032786885246,
+ "eval_f1_macro": 0.8213054775682699,
+ "eval_loss": 0.2678954601287842,
+ "eval_pr_auc": 0.6935428752602653,
+ "eval_precision": 0.7099033011003668,
+ "eval_precision_macro": 0.8257957323787274,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.817019345502292,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.491,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8762
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325608674864403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989176779271893,
+ "eval_f1_macro": 0.8218398486513014,
+ "eval_loss": 0.26806220412254333,
+ "eval_pr_auc": 0.6935170525264562,
+ "eval_precision": 0.7110443777110443,
+ "eval_precision_macro": 0.8264297528888735,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.8174625440398011,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.3,
+ "eval_steps_per_second": 5.658,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8788
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325807801793065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990163934426229,
+ "eval_f1_macro": 0.8218875444165816,
+ "eval_loss": 0.26803991198539734,
+ "eval_pr_auc": 0.6936355510877774,
+ "eval_precision": 0.7109036345448483,
+ "eval_precision_macro": 0.8263858865027319,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8175936018707576,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.235,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8814
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326051308916972,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983762506150566,
+ "eval_f1_macro": 0.8215248382050282,
+ "eval_loss": 0.26792144775390625,
+ "eval_pr_auc": 0.6939264625162457,
+ "eval_precision": 0.7106141522029372,
+ "eval_precision_macro": 0.8261564043164398,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8171098869400901,
+ "eval_runtime": 0.2744,
+ "eval_samples_per_second": 593.925,
+ "eval_steps_per_second": 3.644,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8840
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325905710732574,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6980482204362801,
+ "eval_f1_macro": 0.821330737974691,
+ "eval_loss": 0.26793381571769714,
+ "eval_pr_auc": 0.6938155556406347,
+ "eval_precision": 0.7102803738317757,
+ "eval_precision_macro": 0.825959524727788,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169184681506015,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.704,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8866
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9325911160931989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997542997542997,
+ "eval_f1_macro": 0.8222974802915219,
+ "eval_loss": 0.26800957322120667,
+ "eval_pr_auc": 0.6936597261010234,
+ "eval_precision": 0.711051930758988,
+ "eval_precision_macro": 0.8265713326382553,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8182083746323816,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 923.118,
+ "eval_steps_per_second": 5.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8892
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326288587241547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680290937423706,
+ "eval_pr_auc": 0.6938544134850919,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.353,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8918
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9326493359019604,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982617251557888,
+ "eval_f1_macro": 0.8214517001734176,
+ "eval_loss": 0.2680736482143402,
+ "eval_pr_auc": 0.6938105747317047,
+ "eval_precision": 0.710377043710377,
+ "eval_precision_macro": 0.8260361014844849,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8170797064608241,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.605,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8944
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9326923535473509,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977507798391068,
+ "eval_f1_macro": 0.8211870157279457,
+ "eval_loss": 0.2681424021720886,
+ "eval_pr_auc": 0.6938928037843046,
+ "eval_precision": 0.7107023411371237,
+ "eval_precision_macro": 0.8260910674386901,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816525294657732,
+ "eval_runtime": 0.2453,
+ "eval_samples_per_second": 664.523,
+ "eval_steps_per_second": 4.077,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8970
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9327347288478101,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6979799638692724,
+ "eval_f1_macro": 0.821333351261192,
+ "eval_loss": 0.2682046592235565,
+ "eval_pr_auc": 0.6939995353512864,
+ "eval_precision": 0.7111780455153949,
+ "eval_precision_macro": 0.8263324280334768,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8165856556162641,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.642,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8996
+ },
+ {
+ "epoch": 346.15384615384613,
+ "grad_norm": 18666.783203125,
+ "learning_rate": 2.618336781094791e-07,
+ "loss": 0.2031,
+ "step": 9000
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9326942611171465,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983139630054018,
+ "eval_f1_macro": 0.8214235117341392,
+ "eval_loss": 0.2681069076061249,
+ "eval_pr_auc": 0.6938781737586003,
+ "eval_precision": 0.7091090425531915,
+ "eval_precision_macro": 0.8255029006283365,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.817513396346852,
+ "eval_runtime": 0.1933,
+ "eval_samples_per_second": 843.456,
+ "eval_steps_per_second": 5.175,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9022
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9326940859321652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.26795056462287903,
+ "eval_pr_auc": 0.6940722215736992,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.287,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9048
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9327572303853989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.267974317073822,
+ "eval_pr_auc": 0.6943572217621294,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.819,
+ "eval_steps_per_second": 3.79,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9074
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327859996523161,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6985064828491712,
+ "eval_f1_macro": 0.8216233644580062,
+ "eval_loss": 0.2680947780609131,
+ "eval_pr_auc": 0.6943020020881897,
+ "eval_precision": 0.7112299465240641,
+ "eval_precision_macro": 0.8264413105131714,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170391900676656,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.86,
+ "eval_steps_per_second": 4.153,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9100
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9328183504788495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6986211424819435,
+ "eval_f1_macro": 0.8216965657061384,
+ "eval_loss": 0.2681223750114441,
+ "eval_pr_auc": 0.694450730809475,
+ "eval_precision": 0.7114677365429622,
+ "eval_precision_macro": 0.8265619548577976,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170693705469316,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.919,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9126
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9328099415997506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6945293388795055,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.1729,
+ "eval_samples_per_second": 942.612,
+ "eval_steps_per_second": 5.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9152
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9328013186056745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000819000819001,
+ "eval_f1_macro": 0.8224913728389398,
+ "eval_loss": 0.26793336868286133,
+ "eval_pr_auc": 0.6945633927009858,
+ "eval_precision": 0.7113848202396804,
+ "eval_precision_macro": 0.8267677821793697,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183997934218701,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.431,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9178
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.932783060437631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26802781224250793,
+ "eval_pr_auc": 0.694307607889245,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.039,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9204
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327856687473517,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997875469848014,
+ "eval_f1_macro": 0.8222422697443199,
+ "eval_loss": 0.2679577171802521,
+ "eval_pr_auc": 0.6944152708065687,
+ "eval_precision": 0.7094102054340623,
+ "eval_precision_macro": 0.8258762738882024,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8187429418701,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.379,
+ "eval_steps_per_second": 5.352,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9230
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9328543023299973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26801130175590515,
+ "eval_pr_auc": 0.6946173571124205,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1679,
+ "eval_samples_per_second": 971.014,
+ "eval_steps_per_second": 5.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9256
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328912663610364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002947920078612,
+ "eval_f1_macro": 0.8226120363891247,
+ "eval_loss": 0.26803824305534363,
+ "eval_pr_auc": 0.6948368351435844,
+ "eval_precision": 0.7114808652246256,
+ "eval_precision_macro": 0.8268440754137292,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8185610317320926,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.12,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9282
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328766286826047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003929273084479,
+ "eval_f1_macro": 0.8226594466805872,
+ "eval_loss": 0.2680445909500122,
+ "eval_pr_auc": 0.6947543185413181,
+ "eval_precision": 0.711340206185567,
+ "eval_precision_macro": 0.8268002873554328,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186920895630492,
+ "eval_runtime": 0.2229,
+ "eval_samples_per_second": 731.38,
+ "eval_steps_per_second": 4.487,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9308
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9328791980623294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.2679673135280609,
+ "eval_pr_auc": 0.694877850403851,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 922.941,
+ "eval_steps_per_second": 5.662,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9334
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9328811640271188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680214047431946,
+ "eval_pr_auc": 0.6947522638420174,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.2235,
+ "eval_samples_per_second": 729.222,
+ "eval_steps_per_second": 4.474,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9360
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.932900882070006,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000163478829492,
+ "eval_f1_macro": 0.8223884382480462,
+ "eval_loss": 0.26796379685401917,
+ "eval_pr_auc": 0.6949176089109205,
+ "eval_precision": 0.7098806366047745,
+ "eval_precision_macro": 0.8261149519800236,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188033028286321,
+ "eval_runtime": 0.1694,
+ "eval_samples_per_second": 962.09,
+ "eval_steps_per_second": 5.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9386
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9329447561753046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26795387268066406,
+ "eval_pr_auc": 0.6950891938157696,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.968,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9412
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329731556072616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001636661211129,
+ "eval_f1_macro": 0.8225130616165066,
+ "eval_loss": 0.26798126101493835,
+ "eval_pr_auc": 0.6952376733836106,
+ "eval_precision": 0.7108673978065803,
+ "eval_precision_macro": 0.8265604171937038,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818631728604517,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.549,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9438
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9329669754704237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008994276369583,
+ "eval_f1_macro": 0.8229219452383643,
+ "eval_loss": 0.26792433857917786,
+ "eval_pr_auc": 0.6952423959524336,
+ "eval_precision": 0.7110152621101526,
+ "eval_precision_macro": 0.8267457720422265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8192465013661412,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.085,
+ "eval_steps_per_second": 3.847,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9464
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329833552661686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.2679993808269501,
+ "eval_pr_auc": 0.6952272078548911,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.317,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9490
+ },
+ {
+ "epoch": 365.38461538461536,
+ "grad_norm": 18768.416015625,
+ "learning_rate": 2.0513069380006943e-07,
+ "loss": 0.2014,
+ "step": 9500
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330138179879044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003763704794632,
+ "eval_f1_macro": 0.822633632944773,
+ "eval_loss": 0.26798829436302185,
+ "eval_pr_auc": 0.6953770669982303,
+ "eval_precision": 0.7109634551495017,
+ "eval_precision_macro": 0.8266367281750631,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187929669147396,
+ "eval_runtime": 0.198,
+ "eval_samples_per_second": 823.426,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9516
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9330281442263691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005890052356021,
+ "eval_f1_macro": 0.8227541703278901,
+ "eval_loss": 0.2679848372936249,
+ "eval_pr_auc": 0.6954512195878219,
+ "eval_precision": 0.7110594486881435,
+ "eval_precision_macro": 0.8267130106501293,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189542052249621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.184,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9542
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330502564639999,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2679852545261383,
+ "eval_pr_auc": 0.6955032656447977,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.651,
+ "eval_steps_per_second": 3.887,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9568
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330669768972081,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680181562900543,
+ "eval_pr_auc": 0.6955153529738297,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2378,
+ "eval_samples_per_second": 685.421,
+ "eval_steps_per_second": 4.205,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9594
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9330907631246593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008015704236872,
+ "eval_f1_macro": 0.8228746737830142,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6956635287744017,
+ "eval_precision": 0.7111553784860558,
+ "eval_precision_macro": 0.8267892646512892,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8191154435351846,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.529,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9620
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331252940309591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999672453324599,
+ "eval_f1_macro": 0.8224181697622275,
+ "eval_loss": 0.26801252365112305,
+ "eval_pr_auc": 0.6958269523241747,
+ "eval_precision": 0.7111480865224625,
+ "eval_precision_macro": 0.8266476794611952,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183696129426041,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.791,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9646
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9331262672808548,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26803725957870483,
+ "eval_pr_auc": 0.6957878915651574,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.266,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9672
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9331573528825239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26807889342308044,
+ "eval_pr_auc": 0.6959044832644976,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.113,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9698
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331759224905339,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.2681734561920166,
+ "eval_pr_auc": 0.695850524773773,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.284,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9724
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933143698186487,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2681812345981598,
+ "eval_pr_auc": 0.6956723886102156,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 914.221,
+ "eval_steps_per_second": 5.609,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331405935193198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002123835974514,
+ "eval_f1_macro": 0.8224831299290885,
+ "eval_loss": 0.2681320309638977,
+ "eval_pr_auc": 0.6957066245846253,
+ "eval_precision": 0.7096026490066225,
+ "eval_precision_macro": 0.8260290996114323,
+ "eval_pred_class_0": 16648,
+ "eval_pred_class_1": 3020,
+ "eval_predicted_binding_ratio": 0.1535489119381737,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.819065418490545,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.75,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9776
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331510656881976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010443864229765,
+ "eval_f1_macro": 0.8229384784439624,
+ "eval_loss": 0.2682173550128937,
+ "eval_pr_auc": 0.6955658216411763,
+ "eval_precision": 0.709613478691774,
+ "eval_precision_macro": 0.8261726428372638,
+ "eval_pred_class_0": 16641,
+ "eval_pred_class_1": 3027,
+ "eval_predicted_binding_ratio": 0.15390482001220257,
+ "eval_recall": 0.6926797807158981,
+ "eval_recall_macro": 0.8198112490831255,
+ "eval_runtime": 0.2446,
+ "eval_samples_per_second": 666.285,
+ "eval_steps_per_second": 4.088,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9802
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9331855381995037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994767822105952,
+ "eval_f1_macro": 0.8220743333087894,
+ "eval_loss": 0.2681441009044647,
+ "eval_pr_auc": 0.6959256270686769,
+ "eval_precision": 0.709452736318408,
+ "eval_precision_macro": 0.8258426835378145,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184506457289211,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.04,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9828
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9331895674540718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003267973856209,
+ "eval_f1_macro": 0.8225562214288413,
+ "eval_loss": 0.26809969544410706,
+ "eval_pr_auc": 0.6959934558189067,
+ "eval_precision": 0.7098376946008612,
+ "eval_precision_macro": 0.8261483505739005,
+ "eval_pred_class_0": 16649,
+ "eval_pred_class_1": 3019,
+ "eval_predicted_binding_ratio": 0.15349806792759813,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8190955989698111,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.005,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9854
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332175581210724,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26810789108276367,
+ "eval_pr_auc": 0.6960299382483204,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.1782,
+ "eval_samples_per_second": 914.547,
+ "eval_steps_per_second": 5.611,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9880
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332336167443518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.2680502235889435,
+ "eval_pr_auc": 0.6962912618518755,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.169,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9906
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332394951737218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26803064346313477,
+ "eval_pr_auc": 0.696271279785013,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.1775,
+ "eval_samples_per_second": 918.497,
+ "eval_steps_per_second": 5.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9932
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332396703587031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2680654227733612,
+ "eval_pr_auc": 0.6962410474404584,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.85,
+ "eval_steps_per_second": 4.036,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9958
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332586000691747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.26804468035697937,
+ "eval_pr_auc": 0.6963737898708651,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.1642,
+ "eval_samples_per_second": 992.895,
+ "eval_steps_per_second": 6.091,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9984
+ },
+ {
+ "epoch": 384.61538461538464,
+ "grad_norm": 19506.416015625,
+ "learning_rate": 1.5373466155541264e-07,
+ "loss": 0.1999,
+ "step": 10000
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332885664334637,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26817384362220764,
+ "eval_pr_auc": 0.6963813810367415,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.846,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10010
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332890919884074,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995251350908793,
+ "eval_f1_macro": 0.8221511437890823,
+ "eval_loss": 0.2681432366371155,
+ "eval_pr_auc": 0.6963686032935126,
+ "eval_precision": 0.7105788423153693,
+ "eval_precision_macro": 0.8263313128873689,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181480136738496,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 609.003,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10036
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332816368942063,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681044936180115,
+ "eval_pr_auc": 0.6963340299375516,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2503,
+ "eval_samples_per_second": 651.154,
+ "eval_steps_per_second": 3.995,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10062
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332853547088078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26804089546203613,
+ "eval_pr_auc": 0.6964477494759991,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.349,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10088
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333026785569515,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26805874705314636,
+ "eval_pr_auc": 0.6965416515768459,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1895,
+ "eval_samples_per_second": 860.093,
+ "eval_steps_per_second": 5.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10114
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332940944928713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26800334453582764,
+ "eval_pr_auc": 0.6965549091166009,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.7,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10140
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332915835081403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699591169255928,
+ "eval_f1_macro": 0.8221474102804127,
+ "eval_loss": 0.26801303029060364,
+ "eval_pr_auc": 0.69649915263674,
+ "eval_precision": 0.7096881220968813,
+ "eval_precision_macro": 0.8259621107662262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184808262081871,
+ "eval_runtime": 0.2322,
+ "eval_samples_per_second": 702.068,
+ "eval_steps_per_second": 4.307,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10166
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9333394576705105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680598199367523,
+ "eval_pr_auc": 0.6966844521188784,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.281,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10192
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333311363839021,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26812389492988586,
+ "eval_pr_auc": 0.696579843318821,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.459,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10218
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9333747574442278,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26815417408943176,
+ "eval_pr_auc": 0.6966758102563304,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.797,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10244
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.93337473797923,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26812419295310974,
+ "eval_pr_auc": 0.6967071460749926,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2546,
+ "eval_samples_per_second": 640.225,
+ "eval_steps_per_second": 3.928,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10270
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333609762257046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6966270582247817,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1654,
+ "eval_samples_per_second": 985.392,
+ "eval_steps_per_second": 6.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10296
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333962857319209,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.268099308013916,
+ "eval_pr_auc": 0.6968255966064625,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1974,
+ "eval_samples_per_second": 825.595,
+ "eval_steps_per_second": 5.065,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10322
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9334254248337986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6987714987714988,
+ "eval_f1_macro": 0.8217158026492684,
+ "eval_loss": 0.26809167861938477,
+ "eval_pr_auc": 0.6970114900505864,
+ "eval_precision": 0.7100532623169108,
+ "eval_precision_macro": 0.8259819840149124,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.8176341182639161,
+ "eval_runtime": 0.1847,
+ "eval_samples_per_second": 882.621,
+ "eval_steps_per_second": 5.415,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10348
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334154587548664,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26815035939216614,
+ "eval_pr_auc": 0.6968381832516852,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.281,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10374
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334343592678412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991976420501065,
+ "eval_f1_macro": 0.821957303073866,
+ "eval_loss": 0.26815110445022583,
+ "eval_pr_auc": 0.6969254623478301,
+ "eval_precision": 0.7102461743180306,
+ "eval_precision_macro": 0.826134970486347,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.817956594884361,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.319,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10400
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334508655860725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681412398815155,
+ "eval_pr_auc": 0.6970213943546584,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2502,
+ "eval_samples_per_second": 651.598,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10426
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334578048578289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.26816073060035706,
+ "eval_pr_auc": 0.697032431219364,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2131,
+ "eval_samples_per_second": 764.767,
+ "eval_steps_per_second": 4.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10452
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334644910846125,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6971117657586055,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.1917,
+ "eval_samples_per_second": 850.439,
+ "eval_steps_per_second": 5.217,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10478
+ },
+ {
+ "epoch": 403.84615384615387,
+ "grad_norm": 20065.328125,
+ "learning_rate": 1.0857058873879127e-07,
+ "loss": 0.1991,
+ "step": 10500
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334760435608745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26816511154174805,
+ "eval_pr_auc": 0.6971952135976213,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.311,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10504
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334837419675497,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26816821098327637,
+ "eval_pr_auc": 0.6972179050703514,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.823,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10530
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334893478869489,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.2681950330734253,
+ "eval_pr_auc": 0.6972103120395237,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.636,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10556
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334909050867821,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2681744396686554,
+ "eval_pr_auc": 0.6972816477778223,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.723,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10582
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933494973271346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.2681480348110199,
+ "eval_pr_auc": 0.697380910091478,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.704,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10608
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334896009319218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680969536304474,
+ "eval_pr_auc": 0.6974108729960042,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2261,
+ "eval_samples_per_second": 721.003,
+ "eval_steps_per_second": 4.423,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10634
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334926569365942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26808932423591614,
+ "eval_pr_auc": 0.6974463532877748,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.923,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10660
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334912943867403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680690288543701,
+ "eval_pr_auc": 0.6974656777279113,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.181,
+ "eval_samples_per_second": 900.801,
+ "eval_steps_per_second": 5.526,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10686
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334969197711376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26803234219551086,
+ "eval_pr_auc": 0.6975509824558107,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.403,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10712
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334973090710958,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26804181933403015,
+ "eval_pr_auc": 0.697539016898834,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.163,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10738
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335163069090602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2680352032184601,
+ "eval_pr_auc": 0.697653341121327,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.004,
+ "eval_steps_per_second": 4.184,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10764
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335094357647963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680439054965973,
+ "eval_pr_auc": 0.6975754183405896,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.063,
+ "eval_steps_per_second": 3.927,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10790
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335170855089767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26802244782447815,
+ "eval_pr_auc": 0.6976835126920541,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.122,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10816
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335191877287514,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26805901527404785,
+ "eval_pr_auc": 0.6976076287719296,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.253,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10842
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335267206829443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6976721144908643,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1992,
+ "eval_samples_per_second": 818.364,
+ "eval_steps_per_second": 5.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10868
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9335306331475249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006869479882237,
+ "eval_f1_macro": 0.8228014837466855,
+ "eval_loss": 0.2680812180042267,
+ "eval_pr_auc": 0.6976481882085733,
+ "eval_precision": 0.7109193494855626,
+ "eval_precision_macro": 0.8266695216356063,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190852630559187,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.627,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10894
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335429739562026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6976080200264206,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2232,
+ "eval_samples_per_second": 730.143,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10920
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9335431686061818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681698799133301,
+ "eval_pr_auc": 0.6975308484626278,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1913,
+ "eval_samples_per_second": 852.153,
+ "eval_steps_per_second": 5.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10946
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335561517597906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681558430194855,
+ "eval_pr_auc": 0.6975926126749412,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.252,
+ "eval_samples_per_second": 646.818,
+ "eval_steps_per_second": 3.968,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10972
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335701957557857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.2681851089000702,
+ "eval_pr_auc": 0.6976584248764129,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.463,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10998
+ },
+ {
+ "epoch": 423.0769230769231,
+ "grad_norm": 19880.513671875,
+ "learning_rate": 7.045132214180816e-08,
+ "loss": 0.198,
+ "step": 11000
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933567850223537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.2681238353252411,
+ "eval_pr_auc": 0.6977041251052366,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.736,
+ "eval_steps_per_second": 5.551,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11024
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335591688344673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26810285449028015,
+ "eval_pr_auc": 0.6976771787125668,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 673.038,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11050
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335456990559106,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680869400501251,
+ "eval_pr_auc": 0.6976467269760291,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1727,
+ "eval_samples_per_second": 943.897,
+ "eval_steps_per_second": 5.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11076
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335531541501119,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004412485700278,
+ "eval_f1_macro": 0.8226293306702985,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6976292182478663,
+ "eval_precision": 0.7100728959575878,
+ "eval_precision_macro": 0.8262676792100252,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8191257794490772,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.952,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11102
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335772907475254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813971996307373,
+ "eval_pr_auc": 0.6977601654966087,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2428,
+ "eval_samples_per_second": 671.434,
+ "eval_steps_per_second": 4.119,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11128
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335880938213678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26816368103027344,
+ "eval_pr_auc": 0.6977910817778257,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.286,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11154
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9335897288811925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681582272052765,
+ "eval_pr_auc": 0.6978031299323104,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.36,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11180
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9336081038392237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990831696136215,
+ "eval_f1_macro": 0.8218841874311036,
+ "eval_loss": 0.2681788206100464,
+ "eval_pr_auc": 0.6978648412555615,
+ "eval_precision": 0.7100099767209843,
+ "eval_precision_macro": 0.8260151318092648,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8179264144050951,
+ "eval_runtime": 0.1932,
+ "eval_samples_per_second": 843.696,
+ "eval_steps_per_second": 5.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11206
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.933607091659332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26817184686660767,
+ "eval_pr_auc": 0.6978628373729787,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2181,
+ "eval_samples_per_second": 747.26,
+ "eval_steps_per_second": 4.584,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11232
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336009699174881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26814383268356323,
+ "eval_pr_auc": 0.6978726222778764,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.607,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11258
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335997338901205,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813551783561707,
+ "eval_pr_auc": 0.6978611289947059,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1828,
+ "eval_samples_per_second": 891.596,
+ "eval_steps_per_second": 5.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11284
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335949455006336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681216299533844,
+ "eval_pr_auc": 0.6978496657032133,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.033,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11310
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933600045330087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681162655353546,
+ "eval_pr_auc": 0.6978775373770459,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.163,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11336
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336053398095198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26813197135925293,
+ "eval_pr_auc": 0.6979041519553001,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1888,
+ "eval_samples_per_second": 863.236,
+ "eval_steps_per_second": 5.296,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11362
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933606196269428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813068985939026,
+ "eval_pr_auc": 0.6979019651404079,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.854,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11388
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336083958141924,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813721656799316,
+ "eval_pr_auc": 0.6979009956604231,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.065,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11414
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336056512494865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681269645690918,
+ "eval_pr_auc": 0.697889844022227,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.569,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11440
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336110917164033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681290805339813,
+ "eval_pr_auc": 0.6979179015521837,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.824,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11466
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336152864234539,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681342363357544,
+ "eval_pr_auc": 0.697937589363129,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.511,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11492
+ },
+ {
+ "epoch": 442.3076923076923,
+ "grad_norm": 19259.90625,
+ "learning_rate": 4.0062918659231006e-08,
+ "loss": 0.1984,
+ "step": 11500
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336222548927073,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.26813840866088867,
+ "eval_pr_auc": 0.6979755820701472,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.189,
+ "eval_samples_per_second": 862.533,
+ "eval_steps_per_second": 5.292,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11518
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336241624625029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681417763233185,
+ "eval_pr_auc": 0.6980183846984902,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 913.982,
+ "eval_steps_per_second": 5.607,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11544
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933623228142603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812103390693665,
+ "eval_pr_auc": 0.6980203083536239,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.813,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11570
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933622994562628,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812058687210083,
+ "eval_pr_auc": 0.6980222288630917,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 651.994,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11596
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336267026447306,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681138813495636,
+ "eval_pr_auc": 0.6980542679927909,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 873.899,
+ "eval_steps_per_second": 5.361,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11622
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336341674714307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26811888813972473,
+ "eval_pr_auc": 0.6980920829430033,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.772,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11648
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336351017913307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001472272206772,
+ "eval_f1_macro": 0.8224873029219602,
+ "eval_loss": 0.26812025904655457,
+ "eval_pr_auc": 0.6980982354073687,
+ "eval_precision": 0.7104913678618858,
+ "eval_precision_macro": 0.8263972209146124,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187326059562077,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.262,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11674
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336350044663411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681255340576172,
+ "eval_pr_auc": 0.69809285211945,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 874.141,
+ "eval_steps_per_second": 5.363,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11700
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336387320134417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6981051991963814,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.252,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11726
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336409802207007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.26812514662742615,
+ "eval_pr_auc": 0.6981289571890097,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.1751,
+ "eval_samples_per_second": 930.856,
+ "eval_steps_per_second": 5.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11752
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336424400955443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6981354214954091,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.502,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11778
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336481628049311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6981529233150263,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2333,
+ "eval_samples_per_second": 698.566,
+ "eval_steps_per_second": 4.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11804
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336448537552857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815658807754517,
+ "eval_pr_auc": 0.6981319681162118,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1829,
+ "eval_samples_per_second": 891.289,
+ "eval_steps_per_second": 5.468,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11830
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.268153578042984,
+ "eval_pr_auc": 0.6981406301220767,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.693,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11856
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.268160343170166,
+ "eval_pr_auc": 0.6981424953255787,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.855,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11882
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336505959296704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26815977692604065,
+ "eval_pr_auc": 0.6981608628032375,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.407,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11908
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336521336645056,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815950870513916,
+ "eval_pr_auc": 0.6981611753342029,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1894,
+ "eval_samples_per_second": 860.719,
+ "eval_steps_per_second": 5.28,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11934
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336499146547433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681548595428467,
+ "eval_pr_auc": 0.6981527417806164,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2094,
+ "eval_samples_per_second": 778.376,
+ "eval_steps_per_second": 4.775,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11960
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336533599593742,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681526839733124,
+ "eval_pr_auc": 0.6981686335311912,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.254,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11986
+ },
+ {
+ "epoch": 461.53846153846155,
+ "grad_norm": 19181.365234375,
+ "learning_rate": 1.7952297882945e-08,
+ "loss": 0.1977,
+ "step": 12000
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336556178991323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26815271377563477,
+ "eval_pr_auc": 0.6981807528411922,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2521,
+ "eval_samples_per_second": 646.614,
+ "eval_steps_per_second": 3.967,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12012
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336526397544513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681511640548706,
+ "eval_pr_auc": 0.6981676703517014,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2048,
+ "eval_samples_per_second": 795.899,
+ "eval_steps_per_second": 4.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12038
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336516859695536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814842224121094,
+ "eval_pr_auc": 0.698161790632896,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.01,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12064
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336534183543679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814672350883484,
+ "eval_pr_auc": 0.6981725600302674,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.776,
+ "eval_steps_per_second": 5.354,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12090
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336528344044305,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681439220905304,
+ "eval_pr_auc": 0.6981719802867735,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.256,
+ "eval_steps_per_second": 3.94,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12116
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571167039716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814955472946167,
+ "eval_pr_auc": 0.6981930050756842,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.332,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12142
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336574573414352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814743876457214,
+ "eval_pr_auc": 0.6981957415820915,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1759,
+ "eval_samples_per_second": 926.564,
+ "eval_steps_per_second": 5.684,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12168
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336602894986317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681457996368408,
+ "eval_pr_auc": 0.6982076318844164,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1997,
+ "eval_samples_per_second": 816.139,
+ "eval_steps_per_second": 5.007,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12194
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571945639633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681419849395752,
+ "eval_pr_auc": 0.6981934072595471,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.578,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12220
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336578855713892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681434154510498,
+ "eval_pr_auc": 0.6981968133129176,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.471,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12246
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336576422589153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814115047454834,
+ "eval_pr_auc": 0.6981937092453367,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.347,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12272
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933659540096212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26814183592796326,
+ "eval_pr_auc": 0.698207050020266,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1807,
+ "eval_samples_per_second": 902.095,
+ "eval_steps_per_second": 5.534,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12298
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933661126493542,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813554763793945,
+ "eval_pr_auc": 0.6982130837277154,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2369,
+ "eval_samples_per_second": 688.198,
+ "eval_steps_per_second": 4.222,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12324
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606204035962,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813259720802307,
+ "eval_pr_auc": 0.6982118437878516,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.1655,
+ "eval_samples_per_second": 984.988,
+ "eval_steps_per_second": 6.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12350
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933660883181068,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6982130674055568,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.287,
+ "eval_steps_per_second": 4.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12376
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336603284286276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982114384730127,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.525,
+ "eval_steps_per_second": 4.273,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12402
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606982635879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814013719558716,
+ "eval_pr_auc": 0.6982139708732891,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.084,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12428
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606593335921,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813676953315735,
+ "eval_pr_auc": 0.6982155837128797,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.114,
+ "eval_steps_per_second": 4.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12454
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606009385984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813599467277527,
+ "eval_pr_auc": 0.6982237203295948,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.114,
+ "eval_steps_per_second": 4.185,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12480
+ },
+ {
+ "epoch": 480.7692307692308,
+ "grad_norm": 19666.140625,
+ "learning_rate": 4.5173988392051e-09,
+ "loss": 0.1976,
+ "step": 12500
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336608247860743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982221169303999,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.649,
+ "eval_steps_per_second": 4.078,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12506
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336611459585399,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681364417076111,
+ "eval_pr_auc": 0.6982243970162039,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.641,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12532
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621192084356,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6982286439182355,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.229,
+ "eval_samples_per_second": 711.694,
+ "eval_steps_per_second": 4.366,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12558
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621970684273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681386172771454,
+ "eval_pr_auc": 0.6982328773712362,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.789,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12584
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336618856284606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982308470568848,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.4219,
+ "eval_samples_per_second": 386.39,
+ "eval_steps_per_second": 2.37,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12610
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336622749284189,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813745498657227,
+ "eval_pr_auc": 0.6982319234202713,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.256,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12636
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626836933752,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982353364927889,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.782,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12662
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336625766358866,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813805103302,
+ "eval_pr_auc": 0.6982371615828771,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.662,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12688
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982364881625377,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.289,
+ "eval_steps_per_second": 3.793,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12714
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982365330396263,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.058,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12740
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933662722623371,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982369774278672,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.959,
+ "eval_steps_per_second": 3.908,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12766
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627420883689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982367884435748,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1891,
+ "eval_samples_per_second": 862.094,
+ "eval_steps_per_second": 5.289,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12792
+ },
+ {
+ "epoch": 493.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628199483605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982375574473259,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1955,
+ "eval_samples_per_second": 833.803,
+ "eval_steps_per_second": 5.115,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12818
+ },
+ {
+ "epoch": 494.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627810183648,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982374868010095,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.752,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12844
+ },
+ {
+ "epoch": 495.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628296808595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982373741196756,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2245,
+ "eval_samples_per_second": 726.009,
+ "eval_steps_per_second": 4.454,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12870
+ },
+ {
+ "epoch": 496.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627712858657,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982369260304812,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2507,
+ "eval_samples_per_second": 650.217,
+ "eval_steps_per_second": 3.989,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12896
+ },
+ {
+ "epoch": 497.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627615533667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.698237407972925,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.887,
+ "eval_steps_per_second": 4.061,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12922
+ },
+ {
+ "epoch": 498.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933662722623371,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982366928868454,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2494,
+ "eval_samples_per_second": 653.517,
+ "eval_steps_per_second": 4.009,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12948
+ },
+ {
+ "epoch": 499.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627712858656,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982368906715606,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1725,
+ "eval_samples_per_second": 945.076,
+ "eval_steps_per_second": 5.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12974
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 13000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.0614503790871344e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12974/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7
+size 5368
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a424582b16776120730808f810ad9081375d580e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/config.json
@@ -0,0 +1,52 @@
+{
+ "architectures": [
+ "GloMeModelForTokenClassification"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "cdr_weight": 0.0,
+ "class_weights": [
+ 0.1,
+ 0.9
+ ],
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.1,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "use_glome": true,
+ "use_nsa": true,
+ "vocab_size": 36
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..38fc66a65d33485164d13ff484750b61d1c7ae4e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0af5cd7ea1c961f575b49c1824e246627fe1431012f8c7aefb4c304043a5c505
+size 61385376
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d99c6676d0678ad3855f03b029d3179e2a68a15
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:787d59fa175cff3add21d5089a62d37a22bcfa9310a078ec8ca0ef6cce84aad0
+size 122881658
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0fc6ee76cbc69eb6b79b7b3033a135464af14093
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea30593948da7f35739a54c03f3cb3e2f22c1412ec5c93e4c8da21c76e0b76a
+size 14244
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e5d7ff8cd396b769c0bdea29d5030512c000100
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e9d89428155a2a0813f903fdb3c0bafa943e8b525b97a6c7f8e2bc07ececa9
+size 1064
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..4de772fdedbb7f399373859e1a1d93159f89c7e6
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/trainer_state.json
@@ -0,0 +1,12216 @@
+{
+ "best_global_step": 12818,
+ "best_metric": 0.6982375574473259,
+ "best_model_checkpoint": "./results/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-12818",
+ "epoch": 500.0,
+ "eval_steps": 500,
+ "global_step": 13000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.3402481187716087,
+ "eval_auc": 0.3906724936824889,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25902238465052535,
+ "eval_f1_macro": 0.3322238022959372,
+ "eval_loss": 1.061540961265564,
+ "eval_pr_auc": 0.12123677424188789,
+ "eval_precision": 0.15737977933523004,
+ "eval_precision_macro": 0.49946219326282143,
+ "eval_pred_class_0": 5257,
+ "eval_pred_class_1": 14411,
+ "eval_predicted_binding_ratio": 0.7327130364043116,
+ "eval_recall": 0.7313769751693002,
+ "eval_recall_macro": 0.49920692785748166,
+ "eval_runtime": 0.3106,
+ "eval_samples_per_second": 524.711,
+ "eval_steps_per_second": 3.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 26
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.34141753101484645,
+ "eval_auc": 0.39093619574173194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25910884859577876,
+ "eval_f1_macro": 0.33318779271412513,
+ "eval_loss": 1.0595855712890625,
+ "eval_pr_auc": 0.12129083172780017,
+ "eval_precision": 0.15748852732582394,
+ "eval_precision_macro": 0.4996674570038125,
+ "eval_pred_class_0": 5286,
+ "eval_pred_class_1": 14382,
+ "eval_predicted_binding_ratio": 0.7312385600976204,
+ "eval_recall": 0.7304095453079652,
+ "eval_recall_macro": 0.4995079053877304,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.188,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 52
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.3436038234695953,
+ "eval_auc": 0.3913807276315981,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2594079853143644,
+ "eval_f1_macro": 0.3350089597864736,
+ "eval_loss": 1.0562976598739624,
+ "eval_pr_auc": 0.1213805792649038,
+ "eval_precision": 0.15776986951364175,
+ "eval_precision_macro": 0.5001890381857135,
+ "eval_pred_class_0": 5337,
+ "eval_pred_class_1": 14331,
+ "eval_predicted_binding_ratio": 0.7286455155582673,
+ "eval_recall": 0.7291196388261851,
+ "eval_recall_macro": 0.5002814346723429,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.877,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 78
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.345688427903193,
+ "eval_auc": 0.39204411422551294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25976416450963474,
+ "eval_f1_macro": 0.3367519287310599,
+ "eval_loss": 1.0516862869262695,
+ "eval_pr_auc": 0.1215177922821225,
+ "eval_precision": 0.15807896947633715,
+ "eval_precision_macro": 0.5007519661646174,
+ "eval_pred_class_0": 5384,
+ "eval_pred_class_1": 14284,
+ "eval_predicted_binding_ratio": 0.7262558470612162,
+ "eval_recall": 0.72815220896485,
+ "eval_recall_macro": 0.5011256608293798,
+ "eval_runtime": 0.2689,
+ "eval_samples_per_second": 606.218,
+ "eval_steps_per_second": 3.719,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 104
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.3489424445800285,
+ "eval_auc": 0.39286881698964193,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25986937171261776,
+ "eval_f1_macro": 0.33937421387990774,
+ "eval_loss": 1.0457645654678345,
+ "eval_pr_auc": 0.12168361829310792,
+ "eval_precision": 0.15830985915492957,
+ "eval_precision_macro": 0.5011556611063601,
+ "eval_pred_class_0": 5468,
+ "eval_pred_class_1": 14200,
+ "eval_predicted_binding_ratio": 0.7219849501728697,
+ "eval_recall": 0.7249274427603999,
+ "eval_recall_macro": 0.5017466331928395,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.89,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 130
+ },
+ {
+ "epoch": 6.0,
+ "eval_accuracy": 0.3526540573520439,
+ "eval_auc": 0.3938679358675814,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2595952547103978,
+ "eval_f1_macro": 0.34226375201644554,
+ "eval_loss": 1.0385552644729614,
+ "eval_pr_auc": 0.12189495582289459,
+ "eval_precision": 0.15835402625044342,
+ "eval_precision_macro": 0.5012118238196412,
+ "eval_pred_class_0": 5573,
+ "eval_pred_class_1": 14095,
+ "eval_predicted_binding_ratio": 0.7166463290624364,
+ "eval_recall": 0.7197678168332796,
+ "eval_recall_macro": 0.5018528828839544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.675,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 156
+ },
+ {
+ "epoch": 7.0,
+ "eval_accuracy": 0.3575859263778727,
+ "eval_auc": 0.39509779283079605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25933524825605253,
+ "eval_f1_macro": 0.34607916966826957,
+ "eval_loss": 1.0300335884094238,
+ "eval_pr_auc": 0.12215992714628282,
+ "eval_precision": 0.1584754262788365,
+ "eval_precision_macro": 0.5013918287261083,
+ "eval_pred_class_0": 5710,
+ "eval_pred_class_1": 13958,
+ "eval_predicted_binding_ratio": 0.7096806996135855,
+ "eval_recall": 0.7133182844243793,
+ "eval_recall_macro": 0.5021592327536275,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.797,
+ "eval_steps_per_second": 3.956,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 182
+ },
+ {
+ "epoch": 8.0,
+ "eval_accuracy": 0.36261948342485256,
+ "eval_auc": 0.39656283563130934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2583126257247663,
+ "eval_f1_macro": 0.3497589695442054,
+ "eval_loss": 1.0202081203460693,
+ "eval_pr_auc": 0.12247236024679278,
+ "eval_precision": 0.1581769436997319,
+ "eval_precision_macro": 0.5008542806107318,
+ "eval_pred_class_0": 5867,
+ "eval_pred_class_1": 13801,
+ "eval_predicted_binding_ratio": 0.7016981899532235,
+ "eval_recall": 0.7039664624314738,
+ "eval_recall_macro": 0.5013464231032241,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.667,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 208
+ },
+ {
+ "epoch": 9.0,
+ "eval_accuracy": 0.3690258287573724,
+ "eval_auc": 0.39822865015280895,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25848470363288717,
+ "eval_f1_macro": 0.3546848296925498,
+ "eval_loss": 1.0091010332107544,
+ "eval_pr_auc": 0.12282975659183427,
+ "eval_precision": 0.15863586358635864,
+ "eval_precision_macro": 0.5015788301853557,
+ "eval_pred_class_0": 6033,
+ "eval_pred_class_1": 13635,
+ "eval_predicted_binding_ratio": 0.6932580841976815,
+ "eval_recall": 0.6975169300225733,
+ "eval_recall_macro": 0.5025280068716114,
+ "eval_runtime": 0.2623,
+ "eval_samples_per_second": 621.417,
+ "eval_steps_per_second": 3.812,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 234
+ },
+ {
+ "epoch": 10.0,
+ "eval_accuracy": 0.37553386211104334,
+ "eval_auc": 0.4001638991754374,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25788519637462237,
+ "eval_f1_macro": 0.35943500580602444,
+ "eval_loss": 0.9966734647750854,
+ "eval_pr_auc": 0.12325069957928089,
+ "eval_precision": 0.1586735073239646,
+ "eval_precision_macro": 0.5015911353953799,
+ "eval_pred_class_0": 6219,
+ "eval_pred_class_1": 13449,
+ "eval_predicted_binding_ratio": 0.6838010982306284,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.5025904311199223,
+ "eval_runtime": 0.2668,
+ "eval_samples_per_second": 610.918,
+ "eval_steps_per_second": 3.748,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 260
+ },
+ {
+ "epoch": 11.0,
+ "eval_accuracy": 0.38382143583485867,
+ "eval_auc": 0.4023744221985687,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25855001529519733,
+ "eval_f1_macro": 0.3657153538700335,
+ "eval_loss": 0.9828852415084839,
+ "eval_pr_auc": 0.12373084625745168,
+ "eval_precision": 0.15954394442766537,
+ "eval_precision_macro": 0.5028728439448414,
+ "eval_pred_class_0": 6424,
+ "eval_pred_class_1": 13244,
+ "eval_predicted_binding_ratio": 0.6733780760626398,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.5047576347901956,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.475,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 286
+ },
+ {
+ "epoch": 12.0,
+ "eval_accuracy": 0.3912955053894651,
+ "eval_auc": 0.40482715792324586,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2574122317330356,
+ "eval_f1_macro": 0.370844480646392,
+ "eval_loss": 0.9678097367286682,
+ "eval_pr_auc": 0.12427357405982056,
+ "eval_precision": 0.15935796021810922,
+ "eval_precision_macro": 0.5025013059703454,
+ "eval_pred_class_0": 6647,
+ "eval_pred_class_1": 13021,
+ "eval_predicted_binding_ratio": 0.6620398617042912,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.5042139676659523,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.751,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 312
+ },
+ {
+ "epoch": 13.0,
+ "eval_accuracy": 0.4013117754728493,
+ "eval_auc": 0.40764224431659535,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2572383775941462,
+ "eval_f1_macro": 0.3779059068484294,
+ "eval_loss": 0.9513856172561646,
+ "eval_pr_auc": 0.12488748600523823,
+ "eval_precision": 0.15989648682559598,
+ "eval_precision_macro": 0.5031697587395765,
+ "eval_pred_class_0": 6916,
+ "eval_pred_class_1": 12752,
+ "eval_predicted_binding_ratio": 0.6483628228594671,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.5054414401669225,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.633,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 338
+ },
+ {
+ "epoch": 14.0,
+ "eval_accuracy": 0.4099552572706935,
+ "eval_auc": 0.4108023769954713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.25432114630855235,
+ "eval_f1_macro": 0.38308115532732967,
+ "eval_loss": 0.9335527420043945,
+ "eval_pr_auc": 0.12558462856716973,
+ "eval_precision": 0.15880276039159044,
+ "eval_precision_macro": 0.5015495900209409,
+ "eval_pred_class_0": 7206,
+ "eval_pred_class_1": 12462,
+ "eval_predicted_binding_ratio": 0.6336180597925565,
+ "eval_recall": 0.6381812318606901,
+ "eval_recall_macro": 0.5027086517847544,
+ "eval_runtime": 0.2682,
+ "eval_samples_per_second": 607.718,
+ "eval_steps_per_second": 3.728,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 364
+ },
+ {
+ "epoch": 15.0,
+ "eval_accuracy": 0.41844620703681107,
+ "eval_auc": 0.4144857969457745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2516356974613975,
+ "eval_f1_macro": 0.3880413644466475,
+ "eval_loss": 0.9142351150512695,
+ "eval_pr_auc": 0.1264136402678906,
+ "eval_precision": 0.15784289583846342,
+ "eval_precision_macro": 0.5002307331563727,
+ "eval_pred_class_0": 7485,
+ "eval_pred_class_1": 12183,
+ "eval_predicted_binding_ratio": 0.6194325808419768,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.5004095532886144,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.136,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 390
+ },
+ {
+ "epoch": 16.0,
+ "eval_accuracy": 0.42897091722595077,
+ "eval_auc": 0.41858189431685716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24961582147390926,
+ "eval_f1_macro": 0.3943717007980979,
+ "eval_loss": 0.8934236168861389,
+ "eval_pr_auc": 0.12736412734017702,
+ "eval_precision": 0.15742457441429294,
+ "eval_precision_macro": 0.4996940867457263,
+ "eval_pred_class_0": 7802,
+ "eval_pred_class_1": 11866,
+ "eval_predicted_binding_ratio": 0.6033150294895261,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.4994487317940711,
+ "eval_runtime": 0.2416,
+ "eval_samples_per_second": 674.578,
+ "eval_steps_per_second": 4.139,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 416
+ },
+ {
+ "epoch": 17.0,
+ "eval_accuracy": 0.4378177750660972,
+ "eval_auc": 0.42318268015385996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.24520445081575534,
+ "eval_f1_macro": 0.3986584493314002,
+ "eval_loss": 0.8710600733757019,
+ "eval_pr_auc": 0.12844830521974454,
+ "eval_precision": 0.15552476619328023,
+ "eval_precision_macro": 0.4974052402394973,
+ "eval_pred_class_0": 8120,
+ "eval_pred_class_1": 11548,
+ "eval_predicted_binding_ratio": 0.5871466341264999,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.4952639713574891,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.27,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 442
+ },
+ {
+ "epoch": 18.0,
+ "eval_accuracy": 0.4492576774455969,
+ "eval_auc": 0.4283753771124365,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23911211014329867,
+ "eval_f1_macro": 0.4037791626413705,
+ "eval_loss": 0.8472632765769958,
+ "eval_pr_auc": 0.12969206942947384,
+ "eval_precision": 0.15285136955545578,
+ "eval_precision_macro": 0.4944498263457579,
+ "eval_pred_class_0": 8533,
+ "eval_pred_class_1": 11135,
+ "eval_predicted_binding_ratio": 0.5661480577587961,
+ "eval_recall": 0.5488552079974202,
+ "eval_recall_macro": 0.4897351430824307,
+ "eval_runtime": 0.281,
+ "eval_samples_per_second": 580.087,
+ "eval_steps_per_second": 3.559,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 468
+ },
+ {
+ "epoch": 19.0,
+ "eval_accuracy": 0.46339231238560097,
+ "eval_auc": 0.4341240430739382,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.23588184187662903,
+ "eval_f1_macro": 0.41119432949496704,
+ "eval_loss": 0.8218646049499512,
+ "eval_pr_auc": 0.13110819406948146,
+ "eval_precision": 0.15208663990290355,
+ "eval_precision_macro": 0.4938729504080779,
+ "eval_pred_class_0": 8957,
+ "eval_pred_class_1": 10711,
+ "eval_predicted_binding_ratio": 0.544590197274761,
+ "eval_recall": 0.5253144147049339,
+ "eval_recall_macro": 0.4885580946585574,
+ "eval_runtime": 0.2447,
+ "eval_samples_per_second": 666.13,
+ "eval_steps_per_second": 4.087,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 494
+ },
+ {
+ "epoch": 19.23076923076923,
+ "grad_norm": 232728.109375,
+ "learning_rate": 3.8384615384615384e-07,
+ "loss": 0.99,
+ "step": 500
+ },
+ {
+ "epoch": 20.0,
+ "eval_accuracy": 0.4798657718120805,
+ "eval_auc": 0.44078220133048324,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2309427153811457,
+ "eval_f1_macro": 0.4189975157915178,
+ "eval_loss": 0.7945711016654968,
+ "eval_pr_auc": 0.1327664236209388,
+ "eval_precision": 0.15057347318890305,
+ "eval_precision_macro": 0.49263119629657465,
+ "eval_pred_class_0": 9467,
+ "eval_pred_class_1": 10201,
+ "eval_predicted_binding_ratio": 0.5186597518812284,
+ "eval_recall": 0.49532408900354724,
+ "eval_recall_macro": 0.4861481916617905,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.734,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 520
+ },
+ {
+ "epoch": 21.0,
+ "eval_accuracy": 0.49964409192597115,
+ "eval_auc": 0.4482004774880778,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22407947646455886,
+ "eval_f1_macro": 0.42742637388305044,
+ "eval_loss": 0.765658974647522,
+ "eval_pr_auc": 0.1347110745909903,
+ "eval_precision": 0.14829889375913172,
+ "eval_precision_macro": 0.4908656872127009,
+ "eval_pred_class_0": 10086,
+ "eval_pred_class_1": 9582,
+ "eval_predicted_binding_ratio": 0.48718730933496035,
+ "eval_recall": 0.4582392776523702,
+ "eval_recall_macro": 0.48281674753627146,
+ "eval_runtime": 0.2494,
+ "eval_samples_per_second": 653.647,
+ "eval_steps_per_second": 4.01,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 546
+ },
+ {
+ "epoch": 22.0,
+ "eval_accuracy": 0.5294895261338214,
+ "eval_auc": 0.4563512312496838,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.22025615099427032,
+ "eval_f1_macro": 0.44167751484473966,
+ "eval_loss": 0.7351489067077637,
+ "eval_pr_auc": 0.13684095333600696,
+ "eval_precision": 0.14908178396258698,
+ "eval_precision_macro": 0.49225486317659667,
+ "eval_pred_class_0": 10901,
+ "eval_pred_class_1": 8767,
+ "eval_predicted_binding_ratio": 0.4457494407158837,
+ "eval_recall": 0.4214769429216382,
+ "eval_recall_macro": 0.48559209613637894,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.993,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 572
+ },
+ {
+ "epoch": 23.0,
+ "eval_accuracy": 0.564317673378076,
+ "eval_auc": 0.46539531162556536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21679919568595193,
+ "eval_f1_macro": 0.45751035677940843,
+ "eval_loss": 0.7033414244651794,
+ "eval_pr_auc": 0.1392772958743257,
+ "eval_precision": 0.15127551020408164,
+ "eval_precision_macro": 0.49468577674559844,
+ "eval_pred_class_0": 11828,
+ "eval_pred_class_1": 7840,
+ "eval_predicted_binding_ratio": 0.3986170429123449,
+ "eval_recall": 0.38245727184779105,
+ "eval_recall_macro": 0.49040772688786005,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.699,
+ "eval_steps_per_second": 3.753,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 598
+ },
+ {
+ "epoch": 24.0,
+ "eval_accuracy": 0.6033150294895261,
+ "eval_auc": 0.4754341993823483,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.21223747980613894,
+ "eval_f1_macro": 0.4735759293567254,
+ "eval_loss": 0.6706362962722778,
+ "eval_pr_auc": 0.14231930535250045,
+ "eval_precision": 0.1544906658826988,
+ "eval_precision_macro": 0.4975718001003078,
+ "eval_pred_class_0": 12865,
+ "eval_pred_class_1": 6803,
+ "eval_predicted_binding_ratio": 0.34589180394549524,
+ "eval_recall": 0.3389229280877136,
+ "eval_recall_macro": 0.49586334730576304,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.356,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 624
+ },
+ {
+ "epoch": 25.0,
+ "eval_accuracy": 0.6474984746796827,
+ "eval_auc": 0.4864891211002582,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2075665790376043,
+ "eval_f1_macro": 0.4904508280155492,
+ "eval_loss": 0.6374054551124573,
+ "eval_pr_auc": 0.14557281943245967,
+ "eval_precision": 0.16076487252124647,
+ "eval_precision_macro": 0.5021727358326632,
+ "eval_pred_class_0": 14020,
+ "eval_pred_class_1": 5648,
+ "eval_predicted_binding_ratio": 0.28716697173073014,
+ "eval_recall": 0.2928087713640761,
+ "eval_recall_macro": 0.5033489139611471,
+ "eval_runtime": 0.2341,
+ "eval_samples_per_second": 696.138,
+ "eval_steps_per_second": 4.271,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 650
+ },
+ {
+ "epoch": 26.0,
+ "eval_accuracy": 0.6925462680496237,
+ "eval_auc": 0.49869307137754393,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20360858685631503,
+ "eval_f1_macro": 0.5065549471155847,
+ "eval_loss": 0.6044979691505432,
+ "eval_pr_auc": 0.14935675594952297,
+ "eval_precision": 0.17208370436331255,
+ "eval_precision_macro": 0.5093417994668434,
+ "eval_pred_class_0": 15176,
+ "eval_pred_class_1": 4492,
+ "eval_predicted_binding_ratio": 0.22839129550538947,
+ "eval_recall": 0.2492744276039987,
+ "eval_recall_macro": 0.5123960114117054,
+ "eval_runtime": 0.2543,
+ "eval_samples_per_second": 640.94,
+ "eval_steps_per_second": 3.932,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 676
+ },
+ {
+ "epoch": 27.0,
+ "eval_accuracy": 0.7326113483831604,
+ "eval_auc": 0.5121322314924708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.1977116704805492,
+ "eval_f1_macro": 0.5186416868006297,
+ "eval_loss": 0.5727357268333435,
+ "eval_pr_auc": 0.15383837227298106,
+ "eval_precision": 0.18760856977417487,
+ "eval_precision_macro": 0.518159780138105,
+ "eval_pred_class_0": 16214,
+ "eval_pred_class_1": 3454,
+ "eval_predicted_binding_ratio": 0.1756152125279642,
+ "eval_recall": 0.20896485004837148,
+ "eval_recall_macro": 0.5197960002037596,
+ "eval_runtime": 0.2506,
+ "eval_samples_per_second": 650.309,
+ "eval_steps_per_second": 3.99,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 702
+ },
+ {
+ "epoch": 28.0,
+ "eval_accuracy": 0.7681513117754728,
+ "eval_auc": 0.5270188672472933,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18947742623533595,
+ "eval_f1_macro": 0.5271029967130403,
+ "eval_loss": 0.5426873564720154,
+ "eval_pr_auc": 0.1589999639181036,
+ "eval_precision": 0.21108910891089108,
+ "eval_precision_macro": 0.5306451786169109,
+ "eval_pred_class_0": 17143,
+ "eval_pred_class_1": 2525,
+ "eval_predicted_binding_ratio": 0.12838112670327437,
+ "eval_recall": 0.17188003869719445,
+ "eval_recall_macro": 0.5258205046507038,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.973,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 728
+ },
+ {
+ "epoch": 29.0,
+ "eval_accuracy": 0.7986068741102298,
+ "eval_auc": 0.5437619187831915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.18076525336091004,
+ "eval_f1_macro": 0.5329784934669249,
+ "eval_loss": 0.5149086117744446,
+ "eval_pr_auc": 0.16564494894795073,
+ "eval_precision": 0.2520184544405998,
+ "eval_precision_macro": 0.5517368953367268,
+ "eval_pred_class_0": 17934,
+ "eval_pred_class_1": 1734,
+ "eval_predicted_binding_ratio": 0.08816351433801098,
+ "eval_recall": 0.14092228313447275,
+ "eval_recall_macro": 0.5313170599592205,
+ "eval_runtime": 0.258,
+ "eval_samples_per_second": 631.786,
+ "eval_steps_per_second": 3.876,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 754
+ },
+ {
+ "epoch": 30.0,
+ "eval_accuracy": 0.8241814114297336,
+ "eval_auc": 0.5629712926123112,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17548879351454458,
+ "eval_f1_macro": 0.5385440097559633,
+ "eval_loss": 0.4897482395172119,
+ "eval_pr_auc": 0.17432371223202417,
+ "eval_precision": 0.3366880146386093,
+ "eval_precision_macro": 0.5947773855158054,
+ "eval_pred_class_0": 18575,
+ "eval_pred_class_1": 1093,
+ "eval_predicted_binding_ratio": 0.05557250355908074,
+ "eval_recall": 0.11867139632376653,
+ "eval_recall_macro": 0.5374548506940254,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.589,
+ "eval_steps_per_second": 3.899,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 780
+ },
+ {
+ "epoch": 31.0,
+ "eval_accuracy": 0.8372991661582265,
+ "eval_auc": 0.5843234707368185,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16883116883116883,
+ "eval_f1_macro": 0.5393273806169032,
+ "eval_loss": 0.46770623326301575,
+ "eval_pr_auc": 0.18537280435724188,
+ "eval_precision": 0.43391188251001334,
+ "eval_precision_macro": 0.6435905413924347,
+ "eval_pred_class_0": 18919,
+ "eval_pred_class_1": 749,
+ "eval_predicted_binding_ratio": 0.038082163921090095,
+ "eval_recall": 0.10480490164463076,
+ "eval_recall_macro": 0.5396059276135269,
+ "eval_runtime": 0.262,
+ "eval_samples_per_second": 622.108,
+ "eval_steps_per_second": 3.817,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 806
+ },
+ {
+ "epoch": 32.0,
+ "eval_accuracy": 0.8421293471629042,
+ "eval_auc": 0.6080693861773249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16012983500135244,
+ "eval_f1_macro": 0.5365030891665479,
+ "eval_loss": 0.44841739535331726,
+ "eval_pr_auc": 0.1997259509611161,
+ "eval_precision": 0.4966442953020134,
+ "eval_precision_macro": 0.6747850251677853,
+ "eval_pred_class_0": 19072,
+ "eval_pred_class_1": 596,
+ "eval_predicted_binding_ratio": 0.030303030303030304,
+ "eval_recall": 0.09545307965172525,
+ "eval_recall_macro": 0.5386723960460594,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.915,
+ "eval_steps_per_second": 3.852,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 832
+ },
+ {
+ "epoch": 33.0,
+ "eval_accuracy": 0.8450782997762863,
+ "eval_auc": 0.6341019717128032,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.16497670594683475,
+ "eval_f1_macro": 0.5397977373430758,
+ "eval_loss": 0.4312308728694916,
+ "eval_pr_auc": 0.2181951536640109,
+ "eval_precision": 0.5492700729927007,
+ "eval_precision_macro": 0.7014132791741746,
+ "eval_pred_class_0": 19120,
+ "eval_pred_class_1": 548,
+ "eval_predicted_binding_ratio": 0.0278625177954037,
+ "eval_recall": 0.09706546275395034,
+ "eval_recall_macro": 0.5410781529982704,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.04,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 858
+ },
+ {
+ "epoch": 34.0,
+ "eval_accuracy": 0.8478238763473663,
+ "eval_auc": 0.6614343616815009,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.17932547299149987,
+ "eval_f1_macro": 0.5477310488609045,
+ "eval_loss": 0.41584137082099915,
+ "eval_pr_auc": 0.24110190314317137,
+ "eval_precision": 0.5989010989010989,
+ "eval_precision_macro": 0.7269162957114008,
+ "eval_pred_class_0": 19122,
+ "eval_pred_class_1": 546,
+ "eval_predicted_binding_ratio": 0.027760829774252593,
+ "eval_recall": 0.1054498548855208,
+ "eval_recall_macro": 0.546115402483504,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.016,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 884
+ },
+ {
+ "epoch": 35.0,
+ "eval_accuracy": 0.8509253609924751,
+ "eval_auc": 0.6891114086357669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.20585048754062837,
+ "eval_f1_macro": 0.5617963020129356,
+ "eval_loss": 0.4015716016292572,
+ "eval_pr_auc": 0.2683830744239665,
+ "eval_precision": 0.6429780033840947,
+ "eval_precision_macro": 0.7501727569994856,
+ "eval_pred_class_0": 19077,
+ "eval_pred_class_1": 591,
+ "eval_predicted_binding_ratio": 0.030048810250152533,
+ "eval_recall": 0.12254111576910674,
+ "eval_recall_macro": 0.5549024767594251,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.872,
+ "eval_steps_per_second": 4.005,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 910
+ },
+ {
+ "epoch": 36.0,
+ "eval_accuracy": 0.8537217815741306,
+ "eval_auc": 0.7168296727231165,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2366675510745556,
+ "eval_f1_macro": 0.5778889812054533,
+ "eval_loss": 0.3880736827850342,
+ "eval_pr_auc": 0.2994175694348318,
+ "eval_precision": 0.6676646706586826,
+ "eval_precision_macro": 0.7639639142767097,
+ "eval_pred_class_0": 19000,
+ "eval_pred_class_1": 668,
+ "eval_predicted_binding_ratio": 0.0339637990644702,
+ "eval_recall": 0.1438245727184779,
+ "eval_recall_macro": 0.5652122199621845,
+ "eval_runtime": 0.2693,
+ "eval_samples_per_second": 605.227,
+ "eval_steps_per_second": 3.713,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 936
+ },
+ {
+ "epoch": 37.0,
+ "eval_accuracy": 0.8565690461663616,
+ "eval_auc": 0.743181046261935,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.2768520892078954,
+ "eval_f1_macro": 0.59862076733571,
+ "eval_loss": 0.37574923038482666,
+ "eval_pr_auc": 0.33279138215623166,
+ "eval_precision": 0.675,
+ "eval_precision_macro": 0.7696337714649142,
+ "eval_pred_class_0": 18868,
+ "eval_pred_class_1": 800,
+ "eval_predicted_binding_ratio": 0.04067520846044336,
+ "eval_recall": 0.17413737504030957,
+ "eval_recall_macro": 0.5792217629109919,
+ "eval_runtime": 0.2774,
+ "eval_samples_per_second": 587.664,
+ "eval_steps_per_second": 3.605,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 962
+ },
+ {
+ "epoch": 38.0,
+ "eval_accuracy": 0.8596705308114704,
+ "eval_auc": 0.7678773986205973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3205317577548006,
+ "eval_f1_macro": 0.6211435791665652,
+ "eval_loss": 0.36427780985832214,
+ "eval_pr_auc": 0.3671211589285648,
+ "eval_precision": 0.6774193548387096,
+ "eval_precision_macro": 0.7732261685723991,
+ "eval_pred_class_0": 18707,
+ "eval_pred_class_1": 961,
+ "eval_predicted_binding_ratio": 0.048861094163107584,
+ "eval_recall": 0.20993227990970656,
+ "eval_recall_macro": 0.5956101913823899,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.252,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 988
+ },
+ {
+ "epoch": 38.46153846153846,
+ "grad_norm": 35024.03515625,
+ "learning_rate": 7.684615384615384e-07,
+ "loss": 0.5725,
+ "step": 1000
+ },
+ {
+ "epoch": 39.0,
+ "eval_accuracy": 0.8642464917632703,
+ "eval_auc": 0.7904617013805764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.3723554301833568,
+ "eval_f1_macro": 0.6481240123381296,
+ "eval_loss": 0.35397008061408997,
+ "eval_pr_auc": 0.40223746916130343,
+ "eval_precision": 0.6869037294015612,
+ "eval_precision_macro": 0.7810970172797707,
+ "eval_pred_class_0": 18515,
+ "eval_pred_class_1": 1153,
+ "eval_predicted_binding_ratio": 0.058623144193613995,
+ "eval_recall": 0.25540148339245405,
+ "eval_recall_macro": 0.6168055886811972,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.77,
+ "eval_steps_per_second": 3.888,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1014
+ },
+ {
+ "epoch": 40.0,
+ "eval_accuracy": 0.867246288387228,
+ "eval_auc": 0.8102996097248453,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4125984251968504,
+ "eval_f1_macro": 0.6688826868467987,
+ "eval_loss": 0.3446972072124481,
+ "eval_pr_auc": 0.43559149314237056,
+ "eval_precision": 0.6822916666666666,
+ "eval_precision_macro": 0.7815518582187295,
+ "eval_pred_class_0": 18324,
+ "eval_pred_class_1": 1344,
+ "eval_predicted_binding_ratio": 0.06833435021354485,
+ "eval_recall": 0.29571106094808125,
+ "eval_recall_macro": 0.634968465827454,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.748,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1040
+ },
+ {
+ "epoch": 41.0,
+ "eval_accuracy": 0.8715171852755745,
+ "eval_auc": 0.8272611461298317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4583065380493033,
+ "eval_f1_macro": 0.6927107089600444,
+ "eval_loss": 0.33654505014419556,
+ "eval_pr_auc": 0.4645782536288223,
+ "eval_precision": 0.6835038363171355,
+ "eval_precision_macro": 0.7856317237263981,
+ "eval_pred_class_0": 18104,
+ "eval_pred_class_1": 1564,
+ "eval_predicted_binding_ratio": 0.07952003254016676,
+ "eval_recall": 0.344727507255724,
+ "eval_recall_macro": 0.6574244163911867,
+ "eval_runtime": 0.2602,
+ "eval_samples_per_second": 626.484,
+ "eval_steps_per_second": 3.843,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1066
+ },
+ {
+ "epoch": 42.0,
+ "eval_accuracy": 0.8743136058572301,
+ "eval_auc": 0.8416796876148132,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.4898885678910442,
+ "eval_f1_macro": 0.7091078096051335,
+ "eval_loss": 0.329649955034256,
+ "eval_pr_auc": 0.49067495219464874,
+ "eval_precision": 0.6802292263610316,
+ "eval_precision_macro": 0.7867195342316791,
+ "eval_pred_class_0": 17923,
+ "eval_pred_class_1": 1745,
+ "eval_predicted_binding_ratio": 0.08872279845434208,
+ "eval_recall": 0.38277974846823604,
+ "eval_recall_macro": 0.674549166803684,
+ "eval_runtime": 0.2641,
+ "eval_samples_per_second": 617.276,
+ "eval_steps_per_second": 3.787,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1092
+ },
+ {
+ "epoch": 43.0,
+ "eval_accuracy": 0.8758897701850722,
+ "eval_auc": 0.8534597097025247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5176842521240862,
+ "eval_f1_macro": 0.7232330815689724,
+ "eval_loss": 0.32387641072273254,
+ "eval_pr_auc": 0.5115876936649595,
+ "eval_precision": 0.6683673469387755,
+ "eval_precision_macro": 0.7836133097919538,
+ "eval_pred_class_0": 17708,
+ "eval_pred_class_1": 1960,
+ "eval_predicted_binding_ratio": 0.09965426072808622,
+ "eval_recall": 0.42244437278297325,
+ "eval_recall_macro": 0.6916048748685797,
+ "eval_runtime": 0.2631,
+ "eval_samples_per_second": 619.429,
+ "eval_steps_per_second": 3.8,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1118
+ },
+ {
+ "epoch": 44.0,
+ "eval_accuracy": 0.878991254830181,
+ "eval_auc": 0.863260959032272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5468392993145469,
+ "eval_f1_macro": 0.7385059071387897,
+ "eval_loss": 0.3189404308795929,
+ "eval_pr_auc": 0.5291286431025274,
+ "eval_precision": 0.6675964667596467,
+ "eval_precision_macro": 0.7862729722049646,
+ "eval_pred_class_0": 17517,
+ "eval_pred_class_1": 2151,
+ "eval_predicted_binding_ratio": 0.10936546674801709,
+ "eval_recall": 0.4630764269590455,
+ "eval_recall_macro": 0.7099591708043251,
+ "eval_runtime": 0.2583,
+ "eval_samples_per_second": 630.993,
+ "eval_steps_per_second": 3.871,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1144
+ },
+ {
+ "epoch": 45.0,
+ "eval_accuracy": 0.8797030709782387,
+ "eval_auc": 0.8710211865407248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5603864734299517,
+ "eval_f1_macro": 0.7453519808982827,
+ "eval_loss": 0.3149340748786926,
+ "eval_pr_auc": 0.5420378923897758,
+ "eval_precision": 0.6611135466900482,
+ "eval_precision_macro": 0.7847466853482449,
+ "eval_pred_class_0": 17387,
+ "eval_pred_class_1": 2281,
+ "eval_predicted_binding_ratio": 0.11597518812283913,
+ "eval_recall": 0.48629474363108677,
+ "eval_recall_macro": 0.719817861342917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.753,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1170
+ },
+ {
+ "epoch": 46.0,
+ "eval_accuracy": 0.8811775472849298,
+ "eval_auc": 0.8772876506442417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5748590140076405,
+ "eval_f1_macro": 0.7528983447354317,
+ "eval_loss": 0.3115498721599579,
+ "eval_pr_auc": 0.5526462799402374,
+ "eval_precision": 0.659432387312187,
+ "eval_precision_macro": 0.7856853923591968,
+ "eval_pred_class_0": 17272,
+ "eval_pred_class_1": 2396,
+ "eval_predicted_binding_ratio": 0.12182224933902787,
+ "eval_recall": 0.509513060303128,
+ "eval_recall_macro": 0.7301292590704993,
+ "eval_runtime": 0.247,
+ "eval_samples_per_second": 659.808,
+ "eval_steps_per_second": 4.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1196
+ },
+ {
+ "epoch": 47.0,
+ "eval_accuracy": 0.8817368314012609,
+ "eval_auc": 0.8824923380415335,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5840486409155937,
+ "eval_f1_macro": 0.7575589340187262,
+ "eval_loss": 0.3087034523487091,
+ "eval_pr_auc": 0.5616002050007283,
+ "eval_precision": 0.6555600160578081,
+ "eval_precision_macro": 0.7850484483851945,
+ "eval_pred_class_0": 17177,
+ "eval_pred_class_1": 2491,
+ "eval_predicted_binding_ratio": 0.12665243034370552,
+ "eval_recall": 0.526604321186714,
+ "eval_recall_macro": 0.7374073093831197,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.602,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1222
+ },
+ {
+ "epoch": 48.0,
+ "eval_accuracy": 0.8833638397396787,
+ "eval_auc": 0.8867719903429474,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.5951288386869044,
+ "eval_f1_macro": 0.7634984852775182,
+ "eval_loss": 0.30617523193359375,
+ "eval_pr_auc": 0.5687331552143856,
+ "eval_precision": 0.6573099415204678,
+ "eval_precision_macro": 0.7872879591248483,
+ "eval_pred_class_0": 17103,
+ "eval_pred_class_1": 2565,
+ "eval_predicted_binding_ratio": 0.13041488712629654,
+ "eval_recall": 0.5436955820702999,
+ "eval_recall_macro": 0.7453191497603264,
+ "eval_runtime": 0.2573,
+ "eval_samples_per_second": 633.489,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1248
+ },
+ {
+ "epoch": 49.0,
+ "eval_accuracy": 0.8840248118771609,
+ "eval_auc": 0.8901050792607902,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6022667829119442,
+ "eval_f1_macro": 0.7671909492667516,
+ "eval_loss": 0.3041446805000305,
+ "eval_pr_auc": 0.5742293420515451,
+ "eval_precision": 0.6556567957479119,
+ "eval_precision_macro": 0.7874972953730754,
+ "eval_pred_class_0": 17034,
+ "eval_pred_class_1": 2634,
+ "eval_predicted_binding_ratio": 0.13392312385600977,
+ "eval_recall": 0.5569171235085456,
+ "eval_recall_macro": 0.751084867060001,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 644.934,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1274
+ },
+ {
+ "epoch": 50.0,
+ "eval_accuracy": 0.8846349400040675,
+ "eval_auc": 0.8931467576948593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6055970797844603,
+ "eval_f1_macro": 0.7690165668701654,
+ "eval_loss": 0.30221912264823914,
+ "eval_pr_auc": 0.5797467982851593,
+ "eval_precision": 0.6568627450980392,
+ "eval_precision_macro": 0.7884983683177079,
+ "eval_pred_class_0": 17016,
+ "eval_pred_class_1": 2652,
+ "eval_predicted_binding_ratio": 0.13483831604636973,
+ "eval_recall": 0.561754272815221,
+ "eval_recall_macro": 0.7534129002755408,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.805,
+ "eval_steps_per_second": 3.993,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1300
+ },
+ {
+ "epoch": 51.0,
+ "eval_accuracy": 0.8854484441732764,
+ "eval_auc": 0.8956789398085232,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6095997227516895,
+ "eval_f1_macro": 0.7712381155096151,
+ "eval_loss": 0.30062082409858704,
+ "eval_pr_auc": 0.5844826815319759,
+ "eval_precision": 0.6588014981273408,
+ "eval_precision_macro": 0.7899255166833903,
+ "eval_pred_class_0": 16998,
+ "eval_pred_class_1": 2670,
+ "eval_predicted_binding_ratio": 0.13575350823672971,
+ "eval_recall": 0.5672363753627861,
+ "eval_recall_macro": 0.7561237710700572,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.92,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1326
+ },
+ {
+ "epoch": 52.0,
+ "eval_accuracy": 0.8860077282896075,
+ "eval_auc": 0.8977014114868052,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6157010627356874,
+ "eval_f1_macro": 0.774389842453749,
+ "eval_loss": 0.2989857792854309,
+ "eval_pr_auc": 0.5879586440077966,
+ "eval_precision": 0.6571533113794366,
+ "eval_precision_macro": 0.7900469834133675,
+ "eval_pred_class_0": 16935,
+ "eval_pred_class_1": 2733,
+ "eval_predicted_binding_ratio": 0.13895668090298963,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7613048960873738,
+ "eval_runtime": 0.2404,
+ "eval_samples_per_second": 677.963,
+ "eval_steps_per_second": 4.159,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1352
+ },
+ {
+ "epoch": 53.0,
+ "eval_accuracy": 0.8864144803742119,
+ "eval_auc": 0.8993954307902827,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6165465156196361,
+ "eval_f1_macro": 0.7749399244764847,
+ "eval_loss": 0.29775407910346985,
+ "eval_pr_auc": 0.5914083972949268,
+ "eval_precision": 0.6590825688073394,
+ "eval_precision_macro": 0.7910298047365505,
+ "eval_pred_class_0": 16943,
+ "eval_pred_class_1": 2725,
+ "eval_predicted_binding_ratio": 0.13854992881838518,
+ "eval_recall": 0.5791680103192518,
+ "eval_recall_macro": 0.7615463399215019,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 639.94,
+ "eval_steps_per_second": 3.926,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1378
+ },
+ {
+ "epoch": 54.0,
+ "eval_accuracy": 0.8866178564165141,
+ "eval_auc": 0.9007296980023092,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6186730506155951,
+ "eval_f1_macro": 0.7760410164688105,
+ "eval_loss": 0.296587198972702,
+ "eval_pr_auc": 0.59415963293408,
+ "eval_precision": 0.6585365853658537,
+ "eval_precision_macro": 0.7910908800004612,
+ "eval_pred_class_0": 16921,
+ "eval_pred_class_1": 2747,
+ "eval_predicted_binding_ratio": 0.1396684970510474,
+ "eval_recall": 0.583360206385037,
+ "eval_recall_macro": 0.7633708136410005,
+ "eval_runtime": 0.2605,
+ "eval_samples_per_second": 625.694,
+ "eval_steps_per_second": 3.839,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1404
+ },
+ {
+ "epoch": 55.0,
+ "eval_accuracy": 0.8872279845434208,
+ "eval_auc": 0.9019074471661075,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6215017064846416,
+ "eval_f1_macro": 0.7776226419864958,
+ "eval_loss": 0.2955063581466675,
+ "eval_pr_auc": 0.5967231989416606,
+ "eval_precision": 0.6600217470097861,
+ "eval_precision_macro": 0.7921612076464745,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5872299258303773,
+ "eval_recall_macro": 0.7653056733636705,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.741,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1430
+ },
+ {
+ "epoch": 56.0,
+ "eval_accuracy": 0.8881431767337807,
+ "eval_auc": 0.9030401348597343,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6235455167693361,
+ "eval_f1_macro": 0.778929094226063,
+ "eval_loss": 0.29450690746307373,
+ "eval_pr_auc": 0.5995879576354929,
+ "eval_precision": 0.6642362376959533,
+ "eval_precision_macro": 0.7943337761596458,
+ "eval_pred_class_0": 16925,
+ "eval_pred_class_1": 2743,
+ "eval_predicted_binding_ratio": 0.13946512100874517,
+ "eval_recall": 0.5875524024508223,
+ "eval_recall_macro": 0.7659799798214153,
+ "eval_runtime": 0.2658,
+ "eval_samples_per_second": 613.135,
+ "eval_steps_per_second": 3.762,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1456
+ },
+ {
+ "epoch": 57.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9040587382005859,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6243802359377671,
+ "eval_f1_macro": 0.7793863433697854,
+ "eval_loss": 0.2936408519744873,
+ "eval_pr_auc": 0.6024898616264603,
+ "eval_precision": 0.6644832605531296,
+ "eval_precision_macro": 0.7945643253120258,
+ "eval_pred_class_0": 16920,
+ "eval_pred_class_1": 2748,
+ "eval_predicted_binding_ratio": 0.13971934106162295,
+ "eval_recall": 0.5888423089326024,
+ "eval_recall_macro": 0.7665947525830392,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.369,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1482
+ },
+ {
+ "epoch": 57.69230769230769,
+ "grad_norm": 15613.5302734375,
+ "learning_rate": 9.992863736980368e-07,
+ "loss": 0.3115,
+ "step": 1500
+ },
+ {
+ "epoch": 58.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9048886089216611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6246808510638298,
+ "eval_f1_macro": 0.7793916194591735,
+ "eval_loss": 0.29281434416770935,
+ "eval_pr_auc": 0.603713292882509,
+ "eval_precision": 0.6614996395097332,
+ "eval_precision_macro": 0.7932808958765667,
+ "eval_pred_class_0": 16894,
+ "eval_pred_class_1": 2774,
+ "eval_predicted_binding_ratio": 0.14104128533658736,
+ "eval_recall": 0.5917445985166075,
+ "eval_recall_macro": 0.7675328292275196,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.428,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1508
+ },
+ {
+ "epoch": 59.0,
+ "eval_accuracy": 0.887888956680903,
+ "eval_auc": 0.9056672866982218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6251912289648138,
+ "eval_f1_macro": 0.7796389289833485,
+ "eval_loss": 0.2920655906200409,
+ "eval_pr_auc": 0.6054694565410179,
+ "eval_precision": 0.6610352264557872,
+ "eval_precision_macro": 0.7931493791878604,
+ "eval_pred_class_0": 16886,
+ "eval_pred_class_1": 2782,
+ "eval_predicted_binding_ratio": 0.14144803742119177,
+ "eval_recall": 0.5930345049983876,
+ "eval_recall_macro": 0.7680570605513457,
+ "eval_runtime": 0.235,
+ "eval_samples_per_second": 693.644,
+ "eval_steps_per_second": 4.255,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1534
+ },
+ {
+ "epoch": 60.0,
+ "eval_accuracy": 0.8882957087655075,
+ "eval_auc": 0.9063294664622661,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6254049445865303,
+ "eval_f1_macro": 0.7798830166450921,
+ "eval_loss": 0.29136955738067627,
+ "eval_pr_auc": 0.6071731602747702,
+ "eval_precision": 0.6635311143270622,
+ "eval_precision_macro": 0.7942892202018652,
+ "eval_pred_class_0": 16904,
+ "eval_pred_class_1": 2764,
+ "eval_predicted_binding_ratio": 0.14053284523083182,
+ "eval_recall": 0.5914221218961625,
+ "eval_recall_macro": 0.7676432152306912,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.702,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1560
+ },
+ {
+ "epoch": 61.0,
+ "eval_accuracy": 0.8886007728289608,
+ "eval_auc": 0.9070085321120007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6269368295589988,
+ "eval_f1_macro": 0.7807307642400976,
+ "eval_loss": 0.290680855512619,
+ "eval_pr_auc": 0.6088679721523397,
+ "eval_precision": 0.6641414141414141,
+ "eval_precision_macro": 0.7947837752525253,
+ "eval_pred_class_0": 16896,
+ "eval_pred_class_1": 2772,
+ "eval_predicted_binding_ratio": 0.14093959731543623,
+ "eval_recall": 0.5936794582392777,
+ "eval_recall_macro": 0.7687417029229828,
+ "eval_runtime": 0.2486,
+ "eval_samples_per_second": 655.642,
+ "eval_steps_per_second": 4.022,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1586
+ },
+ {
+ "epoch": 62.0,
+ "eval_accuracy": 0.8887533048606874,
+ "eval_auc": 0.9076136113046634,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6273841961852861,
+ "eval_f1_macro": 0.7810002501366307,
+ "eval_loss": 0.29004454612731934,
+ "eval_pr_auc": 0.6100991712198425,
+ "eval_precision": 0.664741970407795,
+ "eval_precision_macro": 0.7951158511564335,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.5940019348597226,
+ "eval_recall_macro": 0.7689633021917374,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.871,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1612
+ },
+ {
+ "epoch": 63.0,
+ "eval_accuracy": 0.8888041488712629,
+ "eval_auc": 0.9081136281710841,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6286296484971982,
+ "eval_f1_macro": 0.781621309135136,
+ "eval_loss": 0.28950682282447815,
+ "eval_pr_auc": 0.6111782063777282,
+ "eval_precision": 0.6639167862266858,
+ "eval_precision_macro": 0.794932326762632,
+ "eval_pred_class_0": 16880,
+ "eval_pred_class_1": 2788,
+ "eval_predicted_binding_ratio": 0.14175310148464512,
+ "eval_recall": 0.5969042244437278,
+ "eval_recall_macro": 0.7701730031496119,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.907,
+ "eval_steps_per_second": 4.018,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1638
+ },
+ {
+ "epoch": 64.0,
+ "eval_accuracy": 0.889363432987594,
+ "eval_auc": 0.9086206913667498,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6299319727891156,
+ "eval_f1_macro": 0.7824456611913058,
+ "eval_loss": 0.2888965606689453,
+ "eval_pr_auc": 0.6126297306007413,
+ "eval_precision": 0.6664267722202231,
+ "eval_precision_macro": 0.7962366556938643,
+ "eval_pred_class_0": 16889,
+ "eval_pred_class_1": 2779,
+ "eval_predicted_binding_ratio": 0.14129550538946511,
+ "eval_recall": 0.5972267010641729,
+ "eval_recall_macro": 0.7706360462524945,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.362,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1664
+ },
+ {
+ "epoch": 65.0,
+ "eval_accuracy": 0.8900244051250763,
+ "eval_auc": 0.9091278518874051,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6305721605465414,
+ "eval_f1_macro": 0.782984177701663,
+ "eval_loss": 0.2884848117828369,
+ "eval_pr_auc": 0.6142560104629078,
+ "eval_precision": 0.6702977487291213,
+ "eval_precision_macro": 0.7980494301171916,
+ "eval_pred_class_0": 16914,
+ "eval_pred_class_1": 2754,
+ "eval_predicted_binding_ratio": 0.14002440512507627,
+ "eval_recall": 0.5952918413415027,
+ "eval_recall_macro": 0.7702420454972136,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 614.992,
+ "eval_steps_per_second": 3.773,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1690
+ },
+ {
+ "epoch": 66.0,
+ "eval_accuracy": 0.8905836892414074,
+ "eval_auc": 0.9094545718773954,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6327645051194539,
+ "eval_f1_macro": 0.78423982216183,
+ "eval_loss": 0.2880232632160187,
+ "eval_pr_auc": 0.6147358252333397,
+ "eval_precision": 0.6719826023921711,
+ "eval_precision_macro": 0.7991174470355793,
+ "eval_pred_class_0": 16909,
+ "eval_pred_class_1": 2759,
+ "eval_predicted_binding_ratio": 0.14027862517795403,
+ "eval_recall": 0.5978716543050628,
+ "eval_recall_macro": 0.7716224934167917,
+ "eval_runtime": 0.2505,
+ "eval_samples_per_second": 650.665,
+ "eval_steps_per_second": 3.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1716
+ },
+ {
+ "epoch": 67.0,
+ "eval_accuracy": 0.8908887533048607,
+ "eval_auc": 0.9099424231201196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6360244233378561,
+ "eval_f1_macro": 0.7859248910947654,
+ "eval_loss": 0.28752708435058594,
+ "eval_pr_auc": 0.6159928290925853,
+ "eval_precision": 0.6708407871198568,
+ "eval_precision_macro": 0.7990901618287602,
+ "eval_pred_class_0": 16873,
+ "eval_pred_class_1": 2795,
+ "eval_predicted_binding_ratio": 0.14210900955867398,
+ "eval_recall": 0.6046436633344082,
+ "eval_recall_macro": 0.7745557907424743,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.527,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1742
+ },
+ {
+ "epoch": 68.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.910346516476819,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6362247496180614,
+ "eval_f1_macro": 0.7860747010162366,
+ "eval_loss": 0.2870826721191406,
+ "eval_pr_auc": 0.6168347475575285,
+ "eval_precision": 0.6716845878136201,
+ "eval_precision_macro": 0.7994932004123201,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6043211867139633,
+ "eval_recall_macro": 0.7745152743493158,
+ "eval_runtime": 0.266,
+ "eval_samples_per_second": 612.866,
+ "eval_steps_per_second": 3.76,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1768
+ },
+ {
+ "epoch": 69.0,
+ "eval_accuracy": 0.8910921293471629,
+ "eval_auc": 0.9107640601470772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6352179836512262,
+ "eval_f1_macro": 0.7856044496310159,
+ "eval_loss": 0.28665614128112793,
+ "eval_pr_auc": 0.6181373929491851,
+ "eval_precision": 0.673042223024179,
+ "eval_precision_macro": 0.7999465716529429,
+ "eval_pred_class_0": 16897,
+ "eval_pred_class_1": 2771,
+ "eval_predicted_binding_ratio": 0.1408887533048607,
+ "eval_recall": 0.6014188971299581,
+ "eval_recall_macro": 0.7733659343499732,
+ "eval_runtime": 0.2549,
+ "eval_samples_per_second": 639.359,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1794
+ },
+ {
+ "epoch": 70.0,
+ "eval_accuracy": 0.8910412853365873,
+ "eval_auc": 0.9111344401273891,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6354822248681748,
+ "eval_f1_macro": 0.7857149295725039,
+ "eval_loss": 0.28624698519706726,
+ "eval_pr_auc": 0.6190938884927122,
+ "eval_precision": 0.6724262059035278,
+ "eval_precision_macro": 0.7997122148522967,
+ "eval_pred_class_0": 16890,
+ "eval_pred_class_1": 2778,
+ "eval_predicted_binding_ratio": 0.14124466137888958,
+ "eval_recall": 0.6023863269912931,
+ "eval_recall_macro": 0.7737289273635768,
+ "eval_runtime": 0.2673,
+ "eval_samples_per_second": 609.866,
+ "eval_steps_per_second": 3.742,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1820
+ },
+ {
+ "epoch": 71.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9114138601724477,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.63642541624193,
+ "eval_f1_macro": 0.7862246662674524,
+ "eval_loss": 0.2858646512031555,
+ "eval_pr_auc": 0.6197061363545492,
+ "eval_precision": 0.6725314183123878,
+ "eval_precision_macro": 0.7998977650704271,
+ "eval_pred_class_0": 16883,
+ "eval_pred_class_1": 2785,
+ "eval_predicted_binding_ratio": 0.14160056945291843,
+ "eval_recall": 0.6039987100935182,
+ "eval_recall_macro": 0.7744747579561573,
+ "eval_runtime": 0.2386,
+ "eval_samples_per_second": 683.037,
+ "eval_steps_per_second": 4.19,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1846
+ },
+ {
+ "epoch": 72.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9118766988928523,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6373514431239389,
+ "eval_f1_macro": 0.7867436519572335,
+ "eval_loss": 0.2853938341140747,
+ "eval_pr_auc": 0.6212208808374569,
+ "eval_precision": 0.6730010756543564,
+ "eval_precision_macro": 0.8002424656665053,
+ "eval_pred_class_0": 16879,
+ "eval_pred_class_1": 2789,
+ "eval_predicted_binding_ratio": 0.14180394549522066,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7751197111970474,
+ "eval_runtime": 0.249,
+ "eval_samples_per_second": 654.683,
+ "eval_steps_per_second": 4.016,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1872
+ },
+ {
+ "epoch": 73.0,
+ "eval_accuracy": 0.891193817368314,
+ "eval_auc": 0.9121406831945651,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637411047102677,
+ "eval_f1_macro": 0.7867021736679862,
+ "eval_loss": 0.2850610911846161,
+ "eval_pr_auc": 0.6219405042066507,
+ "eval_precision": 0.6715458764726884,
+ "eval_precision_macro": 0.79960764506032,
+ "eval_pred_class_0": 16867,
+ "eval_pred_class_1": 2801,
+ "eval_predicted_binding_ratio": 0.14241407362212732,
+ "eval_recall": 0.6065785230570784,
+ "eval_recall_macro": 0.7755232206038093,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 618.95,
+ "eval_steps_per_second": 3.797,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1898
+ },
+ {
+ "epoch": 74.0,
+ "eval_accuracy": 0.8912446613788896,
+ "eval_auc": 0.9124869655074592,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.637026981164093,
+ "eval_f1_macro": 0.7865337040796394,
+ "eval_loss": 0.284681111574173,
+ "eval_pr_auc": 0.6229948438184316,
+ "eval_precision": 0.6722779369627507,
+ "eval_precision_macro": 0.7998744508231626,
+ "eval_pred_class_0": 16876,
+ "eval_pred_class_1": 2792,
+ "eval_predicted_binding_ratio": 0.14195647752694732,
+ "eval_recall": 0.6052886165752983,
+ "eval_recall_macro": 0.7750291697592493,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.745,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1924
+ },
+ {
+ "epoch": 75.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9128360118500571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6385786802030456,
+ "eval_f1_macro": 0.7873381643700563,
+ "eval_loss": 0.2842992842197418,
+ "eval_pr_auc": 0.6238239183047751,
+ "eval_precision": 0.6717693129227483,
+ "eval_precision_macro": 0.7998801484834395,
+ "eval_pred_class_0": 16859,
+ "eval_pred_class_1": 2809,
+ "eval_predicted_binding_ratio": 0.14282082570673174,
+ "eval_recall": 0.6085133827797484,
+ "eval_recall_macro": 0.7764302895066123,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.282,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1950
+ },
+ {
+ "epoch": 76.0,
+ "eval_accuracy": 0.8913971934106162,
+ "eval_auc": 0.9131794425407568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.638700947225981,
+ "eval_f1_macro": 0.7873973860112672,
+ "eval_loss": 0.2839708924293518,
+ "eval_pr_auc": 0.6248797725776689,
+ "eval_precision": 0.671647100675916,
+ "eval_precision_macro": 0.7998444318708524,
+ "eval_pred_class_0": 16857,
+ "eval_pred_class_1": 2811,
+ "eval_predicted_binding_ratio": 0.14292251372788287,
+ "eval_recall": 0.6088358594001935,
+ "eval_recall_macro": 0.7765613473375688,
+ "eval_runtime": 0.2644,
+ "eval_samples_per_second": 616.603,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 1976
+ },
+ {
+ "epoch": 76.92307692307692,
+ "grad_norm": 18483.060546875,
+ "learning_rate": 9.912189372587507e-07,
+ "loss": 0.2796,
+ "step": 2000
+ },
+ {
+ "epoch": 77.0,
+ "eval_accuracy": 0.891651413463494,
+ "eval_auc": 0.9134005357195656,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6389971201084195,
+ "eval_f1_macro": 0.7876288504858192,
+ "eval_loss": 0.28368592262268066,
+ "eval_pr_auc": 0.6256253637409228,
+ "eval_precision": 0.6730906495360457,
+ "eval_precision_macro": 0.8005261145225586,
+ "eval_pred_class_0": 16866,
+ "eval_pred_class_1": 2802,
+ "eval_predicted_binding_ratio": 0.14246491763270286,
+ "eval_recall": 0.6081909061593035,
+ "eval_recall_macro": 0.7764501340719858,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.408,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2002
+ },
+ {
+ "epoch": 78.0,
+ "eval_accuracy": 0.8918547895057962,
+ "eval_auc": 0.9135784263355038,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6399187404773997,
+ "eval_f1_macro": 0.788145675542478,
+ "eval_loss": 0.283357173204422,
+ "eval_pr_auc": 0.6259773419133142,
+ "eval_precision": 0.6735566642908054,
+ "eval_precision_macro": 0.8008691873227245,
+ "eval_pred_class_0": 16862,
+ "eval_pred_class_1": 2806,
+ "eval_predicted_binding_ratio": 0.14266829367500508,
+ "eval_recall": 0.6094808126410836,
+ "eval_recall_macro": 0.7770950873128759,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.804,
+ "eval_steps_per_second": 4.152,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2028
+ },
+ {
+ "epoch": 79.0,
+ "eval_accuracy": 0.8920073215375229,
+ "eval_auc": 0.9139080660751812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6396335256192739,
+ "eval_f1_macro": 0.788060288914535,
+ "eval_loss": 0.28298139572143555,
+ "eval_pr_auc": 0.6270064177031266,
+ "eval_precision": 0.6749015395631937,
+ "eval_precision_macro": 0.8014211401519672,
+ "eval_pred_class_0": 16875,
+ "eval_pred_class_1": 2793,
+ "eval_predicted_binding_ratio": 0.14200732153752288,
+ "eval_recall": 0.6078684295388584,
+ "eval_recall_macro": 0.7765303395958915,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.262,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2054
+ },
+ {
+ "epoch": 80.0,
+ "eval_accuracy": 0.8923123856009763,
+ "eval_auc": 0.9140997087121456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6410169491525424,
+ "eval_f1_macro": 0.7888360257187523,
+ "eval_loss": 0.28268861770629883,
+ "eval_pr_auc": 0.6271691682976167,
+ "eval_precision": 0.6755984280100036,
+ "eval_precision_macro": 0.8019346102940528,
+ "eval_pred_class_0": 16869,
+ "eval_pred_class_1": 2799,
+ "eval_predicted_binding_ratio": 0.1423123856009762,
+ "eval_recall": 0.6098032892615285,
+ "eval_recall_macro": 0.7774977694572265,
+ "eval_runtime": 0.2121,
+ "eval_samples_per_second": 768.576,
+ "eval_steps_per_second": 4.715,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2080
+ },
+ {
+ "epoch": 81.0,
+ "eval_accuracy": 0.8924140736221273,
+ "eval_auc": 0.9143275951752264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6415989159891599,
+ "eval_f1_macro": 0.7891531311221224,
+ "eval_loss": 0.28239867091178894,
+ "eval_pr_auc": 0.6278526459152028,
+ "eval_precision": 0.6757046022119158,
+ "eval_precision_macro": 0.8020681327098713,
+ "eval_pred_class_0": 16865,
+ "eval_pred_class_1": 2803,
+ "eval_predicted_binding_ratio": 0.14251576164327842,
+ "eval_recall": 0.6107707191228636,
+ "eval_recall_macro": 0.7779513039086281,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.069,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2106
+ },
+ {
+ "epoch": 82.0,
+ "eval_accuracy": 0.8925666056538539,
+ "eval_auc": 0.9145931950717662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.641317263622475,
+ "eval_f1_macro": 0.7890694555517069,
+ "eval_loss": 0.2821619510650635,
+ "eval_pr_auc": 0.6287354538303637,
+ "eval_precision": 0.6770609318996416,
+ "eval_precision_macro": 0.8026257379014738,
+ "eval_pred_class_0": 16878,
+ "eval_pred_class_1": 2790,
+ "eval_predicted_binding_ratio": 0.14185478950579622,
+ "eval_recall": 0.6091583360206385,
+ "eval_recall_macro": 0.7773865561916435,
+ "eval_runtime": 0.2651,
+ "eval_samples_per_second": 614.835,
+ "eval_steps_per_second": 3.772,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2132
+ },
+ {
+ "epoch": 83.0,
+ "eval_accuracy": 0.8929733577384584,
+ "eval_auc": 0.9148203126674294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6442453946256549,
+ "eval_f1_macro": 0.7906286370477088,
+ "eval_loss": 0.2817782461643219,
+ "eval_pr_auc": 0.6293872239214393,
+ "eval_precision": 0.6768465909090909,
+ "eval_precision_macro": 0.8029675631972466,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6146404385682038,
+ "eval_recall_macro": 0.7798559831520322,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.502,
+ "eval_steps_per_second": 3.88,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2158
+ },
+ {
+ "epoch": 84.0,
+ "eval_accuracy": 0.8929225137278829,
+ "eval_auc": 0.9150136584917113,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6435341909275558,
+ "eval_f1_macro": 0.7902665569930348,
+ "eval_loss": 0.2815438210964203,
+ "eval_pr_auc": 0.6300492382313454,
+ "eval_precision": 0.677235482721767,
+ "eval_precision_macro": 0.8030326633702543,
+ "eval_pred_class_0": 16861,
+ "eval_pred_class_1": 2807,
+ "eval_predicted_binding_ratio": 0.14271913768558064,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7791705135179836,
+ "eval_runtime": 0.2597,
+ "eval_samples_per_second": 627.685,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2184
+ },
+ {
+ "epoch": 85.0,
+ "eval_accuracy": 0.8928716697173072,
+ "eval_auc": 0.9151760160393141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6437869822485207,
+ "eval_f1_macro": 0.7903713942390684,
+ "eval_loss": 0.28126296401023865,
+ "eval_pr_auc": 0.6304146488380505,
+ "eval_precision": 0.6766169154228856,
+ "eval_precision_macro": 0.8027975997548746,
+ "eval_pred_class_0": 16854,
+ "eval_pred_class_1": 2814,
+ "eval_predicted_binding_ratio": 0.14307504575960953,
+ "eval_recall": 0.6139954853273137,
+ "eval_recall_macro": 0.7795335065315872,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.417,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2210
+ },
+ {
+ "epoch": 86.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9154795142867925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.643861134631668,
+ "eval_f1_macro": 0.7904777241463208,
+ "eval_loss": 0.2809857428073883,
+ "eval_pr_auc": 0.6313964494387146,
+ "eval_precision": 0.677960057061341,
+ "eval_precision_macro": 0.803401280902587,
+ "eval_pred_class_0": 16864,
+ "eval_pred_class_1": 2804,
+ "eval_predicted_binding_ratio": 0.14256660565385398,
+ "eval_recall": 0.6130280554659787,
+ "eval_recall_macro": 0.7792610549557817,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.65,
+ "eval_steps_per_second": 3.924,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2236
+ },
+ {
+ "epoch": 87.0,
+ "eval_accuracy": 0.8930750457596095,
+ "eval_auc": 0.9156233995513745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6458999831621485,
+ "eval_f1_macro": 0.7914651276711422,
+ "eval_loss": 0.28072693943977356,
+ "eval_pr_auc": 0.631527672626228,
+ "eval_precision": 0.6758280479210712,
+ "eval_precision_macro": 0.8027684505796682,
+ "eval_pred_class_0": 16830,
+ "eval_pred_class_1": 2838,
+ "eval_predicted_binding_ratio": 0.14429530201342283,
+ "eval_recall": 0.618510158013544,
+ "eval_recall_macro": 0.7814890380820421,
+ "eval_runtime": 0.2629,
+ "eval_samples_per_second": 620.026,
+ "eval_steps_per_second": 3.804,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2262
+ },
+ {
+ "epoch": 88.0,
+ "eval_accuracy": 0.8932275777913362,
+ "eval_auc": 0.9158623713307676,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6449103821440649,
+ "eval_f1_macro": 0.7910387587819241,
+ "eval_loss": 0.28049618005752563,
+ "eval_pr_auc": 0.6324229662687507,
+ "eval_precision": 0.6779239246356203,
+ "eval_precision_macro": 0.8035422055690709,
+ "eval_pred_class_0": 16855,
+ "eval_pred_class_1": 2813,
+ "eval_predicted_binding_ratio": 0.14302420174903396,
+ "eval_recall": 0.6149629151886489,
+ "eval_recall_macro": 0.7801379433793187,
+ "eval_runtime": 0.2576,
+ "eval_samples_per_second": 632.715,
+ "eval_steps_per_second": 3.882,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2288
+ },
+ {
+ "epoch": 89.0,
+ "eval_accuracy": 0.893125889770185,
+ "eval_auc": 0.9160425393514616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6452919338508268,
+ "eval_f1_macro": 0.7911883195144587,
+ "eval_loss": 0.28025364875793457,
+ "eval_pr_auc": 0.6329798450144843,
+ "eval_precision": 0.6768141592920354,
+ "eval_precision_macro": 0.8031105172758937,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7807328715755691,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.097,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2314
+ },
+ {
+ "epoch": 90.0,
+ "eval_accuracy": 0.8936343298759406,
+ "eval_auc": 0.9161711835226769,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463826910074375,
+ "eval_f1_macro": 0.7918927219250234,
+ "eval_loss": 0.2800801396369934,
+ "eval_pr_auc": 0.6332605675535015,
+ "eval_precision": 0.6792184724689165,
+ "eval_precision_macro": 0.8043336176502299,
+ "eval_pred_class_0": 16853,
+ "eval_pred_class_1": 2815,
+ "eval_predicted_binding_ratio": 0.14312588977018506,
+ "eval_recall": 0.6165752982908739,
+ "eval_recall_macro": 0.7810346763682292,
+ "eval_runtime": 0.2564,
+ "eval_samples_per_second": 635.634,
+ "eval_steps_per_second": 3.9,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2340
+ },
+ {
+ "epoch": 91.0,
+ "eval_accuracy": 0.8934309538336384,
+ "eval_auc": 0.9163414730569294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6463044211947351,
+ "eval_f1_macro": 0.7917843566614202,
+ "eval_loss": 0.2798333764076233,
+ "eval_pr_auc": 0.633657166273441,
+ "eval_precision": 0.6778761061946903,
+ "eval_precision_macro": 0.8037305484960271,
+ "eval_pred_class_0": 16843,
+ "eval_pred_class_1": 2825,
+ "eval_predicted_binding_ratio": 0.1436343298759406,
+ "eval_recall": 0.617542728152209,
+ "eval_recall_macro": 0.7813071279440347,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.372,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2366
+ },
+ {
+ "epoch": 92.0,
+ "eval_accuracy": 0.8937868619076673,
+ "eval_auc": 0.9165959292421633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6469494676356261,
+ "eval_f1_macro": 0.7922200583338069,
+ "eval_loss": 0.27958908677101135,
+ "eval_pr_auc": 0.6345494466448222,
+ "eval_precision": 0.6796875,
+ "eval_precision_macro": 0.8046253782933777,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.617220251531764,
+ "eval_recall_macro": 0.7813873334679403,
+ "eval_runtime": 0.2594,
+ "eval_samples_per_second": 628.29,
+ "eval_steps_per_second": 3.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2392
+ },
+ {
+ "epoch": 93.0,
+ "eval_accuracy": 0.8938885499288184,
+ "eval_auc": 0.9168139566838005,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6482386650935446,
+ "eval_f1_macro": 0.7928796235385993,
+ "eval_loss": 0.27935075759887695,
+ "eval_pr_auc": 0.6350396647674293,
+ "eval_precision": 0.6790254237288136,
+ "eval_precision_macro": 0.8045281549625298,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.7826272149050808,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.307,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2418
+ },
+ {
+ "epoch": 94.0,
+ "eval_accuracy": 0.8941936139922717,
+ "eval_auc": 0.916949978089225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6492499578628013,
+ "eval_f1_macro": 0.7934750822155368,
+ "eval_loss": 0.27906060218811035,
+ "eval_pr_auc": 0.6354743744677446,
+ "eval_precision": 0.6800847457627118,
+ "eval_precision_macro": 0.8051469107763429,
+ "eval_pred_class_0": 16836,
+ "eval_pred_class_1": 2832,
+ "eval_predicted_binding_ratio": 0.14399023794996949,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7832014712735463,
+ "eval_runtime": 0.255,
+ "eval_samples_per_second": 639.231,
+ "eval_steps_per_second": 3.922,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2444
+ },
+ {
+ "epoch": 95.0,
+ "eval_accuracy": 0.894498678055725,
+ "eval_auc": 0.9171692902207247,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6496707749451291,
+ "eval_f1_macro": 0.7937845988573549,
+ "eval_loss": 0.2788851261138916,
+ "eval_pr_auc": 0.6362118552671664,
+ "eval_precision": 0.6817859673990078,
+ "eval_precision_macro": 0.8059588747122072,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6204450177362141,
+ "eval_recall_macro": 0.7831204384872295,
+ "eval_runtime": 0.2567,
+ "eval_samples_per_second": 634.948,
+ "eval_steps_per_second": 3.895,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2470
+ },
+ {
+ "epoch": 96.0,
+ "eval_accuracy": 0.8946003660768761,
+ "eval_auc": 0.9172832383185145,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.64977192093259,
+ "eval_f1_macro": 0.7938688135051675,
+ "eval_loss": 0.27873092889785767,
+ "eval_pr_auc": 0.6365172336600542,
+ "eval_precision": 0.6823988644428672,
+ "eval_precision_macro": 0.8062439426071903,
+ "eval_pred_class_0": 16850,
+ "eval_pred_class_1": 2818,
+ "eval_predicted_binding_ratio": 0.14327842180191175,
+ "eval_recall": 0.6201225411157691,
+ "eval_recall_macro": 0.783049741614805,
+ "eval_runtime": 0.2642,
+ "eval_samples_per_second": 617.058,
+ "eval_steps_per_second": 3.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2496
+ },
+ {
+ "epoch": 96.15384615384616,
+ "grad_norm": 12855.328125,
+ "learning_rate": 9.74310718484651e-07,
+ "loss": 0.268,
+ "step": 2500
+ },
+ {
+ "epoch": 97.0,
+ "eval_accuracy": 0.8948037421191783,
+ "eval_auc": 0.9174495472606937,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6506837751139625,
+ "eval_f1_macro": 0.7943808843546348,
+ "eval_loss": 0.27859047055244446,
+ "eval_pr_auc": 0.636938752781715,
+ "eval_precision": 0.6828490432317506,
+ "eval_precision_macro": 0.8065794545376371,
+ "eval_pred_class_0": 16846,
+ "eval_pred_class_1": 2822,
+ "eval_predicted_binding_ratio": 0.14348179784421394,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783694694855695,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.744,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2522
+ },
+ {
+ "epoch": 98.0,
+ "eval_accuracy": 0.8951596501932072,
+ "eval_auc": 0.9175816853990344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6521592442645074,
+ "eval_f1_macro": 0.7952187504847441,
+ "eval_loss": 0.2782803475856781,
+ "eval_pr_auc": 0.6372336473067074,
+ "eval_precision": 0.6837637071100107,
+ "eval_precision_macro": 0.8072045778587877,
+ "eval_pred_class_0": 16841,
+ "eval_pred_class_1": 2827,
+ "eval_predicted_binding_ratio": 0.14373601789709173,
+ "eval_recall": 0.6233473073202193,
+ "eval_recall_macro": 0.784692305196296,
+ "eval_runtime": 0.2192,
+ "eval_samples_per_second": 743.723,
+ "eval_steps_per_second": 4.563,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2548
+ },
+ {
+ "epoch": 99.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917689375499995,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6515867656988521,
+ "eval_f1_macro": 0.7949062764205981,
+ "eval_loss": 0.2782030701637268,
+ "eval_pr_auc": 0.6376582660543189,
+ "eval_precision": 0.683669854764435,
+ "eval_precision_macro": 0.8070768389286704,
+ "eval_pred_class_0": 16845,
+ "eval_pred_class_1": 2823,
+ "eval_predicted_binding_ratio": 0.1435326418547895,
+ "eval_recall": 0.6223798774588842,
+ "eval_recall_macro": 0.7842387707448946,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.364,
+ "eval_steps_per_second": 3.935,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2574
+ },
+ {
+ "epoch": 100.0,
+ "eval_accuracy": 0.8950579621720561,
+ "eval_auc": 0.917862049496492,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513513513513514,
+ "eval_f1_macro": 0.7947922665303561,
+ "eval_loss": 0.2779688835144043,
+ "eval_pr_auc": 0.6381115995039711,
+ "eval_precision": 0.6839304717985101,
+ "eval_precision_macro": 0.8071560484103832,
+ "eval_pred_class_0": 16849,
+ "eval_pred_class_1": 2819,
+ "eval_predicted_binding_ratio": 0.14332926581248728,
+ "eval_recall": 0.6217349242179941,
+ "eval_recall_macro": 0.7839766550829814,
+ "eval_runtime": 0.219,
+ "eval_samples_per_second": 744.395,
+ "eval_steps_per_second": 4.567,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2600
+ },
+ {
+ "epoch": 101.0,
+ "eval_accuracy": 0.8951088061826317,
+ "eval_auc": 0.9180189763096767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6513435862768294,
+ "eval_f1_macro": 0.7948061179237165,
+ "eval_loss": 0.27778077125549316,
+ "eval_pr_auc": 0.6385730633658938,
+ "eval_precision": 0.6843039772727273,
+ "eval_precision_macro": 0.8073193278245905,
+ "eval_pred_class_0": 16852,
+ "eval_pred_class_1": 2816,
+ "eval_predicted_binding_ratio": 0.14317673378076062,
+ "eval_recall": 0.6214124475975492,
+ "eval_recall_macro": 0.783875777731291,
+ "eval_runtime": 0.2529,
+ "eval_samples_per_second": 644.591,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2626
+ },
+ {
+ "epoch": 102.0,
+ "eval_accuracy": 0.895413870246085,
+ "eval_auc": 0.9182058500221522,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6518869521069555,
+ "eval_f1_macro": 0.7951749356520059,
+ "eval_loss": 0.2776651084423065,
+ "eval_pr_auc": 0.6390385071928153,
+ "eval_precision": 0.6858974358974359,
+ "eval_precision_macro": 0.8081029290993704,
+ "eval_pred_class_0": 16860,
+ "eval_pred_class_1": 2808,
+ "eval_predicted_binding_ratio": 0.1427699816961562,
+ "eval_recall": 0.6210899709771042,
+ "eval_recall_macro": 0.7839258027759306,
+ "eval_runtime": 0.2557,
+ "eval_samples_per_second": 637.358,
+ "eval_steps_per_second": 3.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2652
+ },
+ {
+ "epoch": 103.0,
+ "eval_accuracy": 0.8952104942037828,
+ "eval_auc": 0.9183596527031714,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6527379949452401,
+ "eval_f1_macro": 0.7955166277830898,
+ "eval_loss": 0.2773846685886383,
+ "eval_pr_auc": 0.6394513455183966,
+ "eval_precision": 0.6834862385321101,
+ "eval_precision_macro": 0.8071702310636076,
+ "eval_pred_class_0": 16834,
+ "eval_pred_class_1": 2834,
+ "eval_predicted_binding_ratio": 0.1440919259711206,
+ "eval_recall": 0.6246372138019993,
+ "eval_recall_macro": 0.785246716999388,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.926,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2678
+ },
+ {
+ "epoch": 104.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9185293680199856,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.2772791385650635,
+ "eval_pr_auc": 0.6398373166129732,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.649,
+ "eval_steps_per_second": 3.857,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2704
+ },
+ {
+ "epoch": 105.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9186085224340037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6536519690339953,
+ "eval_f1_macro": 0.7960120658489734,
+ "eval_loss": 0.27707138657569885,
+ "eval_pr_auc": 0.6400560131071933,
+ "eval_precision": 0.6835621260119676,
+ "eval_precision_macro": 0.8073423632971826,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6262495969042244,
+ "eval_recall_macro": 0.7859925475919686,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.533,
+ "eval_steps_per_second": 3.985,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2730
+ },
+ {
+ "epoch": 106.0,
+ "eval_accuracy": 0.8953630262355095,
+ "eval_auc": 0.9187703544266627,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6535353535353535,
+ "eval_f1_macro": 0.7959556034654849,
+ "eval_loss": 0.27683117985725403,
+ "eval_pr_auc": 0.6407556071793965,
+ "eval_precision": 0.6836914406481155,
+ "eval_precision_macro": 0.8073814027769664,
+ "eval_pred_class_0": 16829,
+ "eval_pred_class_1": 2839,
+ "eval_predicted_binding_ratio": 0.14434614602399837,
+ "eval_recall": 0.6259271202837794,
+ "eval_recall_macro": 0.7858614897610121,
+ "eval_runtime": 0.3724,
+ "eval_samples_per_second": 437.667,
+ "eval_steps_per_second": 2.685,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2756
+ },
+ {
+ "epoch": 107.0,
+ "eval_accuracy": 0.8955664022778117,
+ "eval_auc": 0.918983612943811,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6543251430494783,
+ "eval_f1_macro": 0.7964085438550979,
+ "eval_loss": 0.2766495645046234,
+ "eval_pr_auc": 0.6413517959683596,
+ "eval_precision": 0.6842661034846885,
+ "eval_precision_macro": 0.8077537803332993,
+ "eval_pred_class_0": 16827,
+ "eval_pred_class_1": 2841,
+ "eval_predicted_binding_ratio": 0.1444478340451495,
+ "eval_recall": 0.6268945501451145,
+ "eval_recall_macro": 0.7863753851709456,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 631.95,
+ "eval_steps_per_second": 3.877,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2782
+ },
+ {
+ "epoch": 108.0,
+ "eval_accuracy": 0.8957189343095383,
+ "eval_auc": 0.9190842761805244,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655583543240974,
+ "eval_f1_macro": 0.7970707027489733,
+ "eval_loss": 0.27643415331840515,
+ "eval_pr_auc": 0.6415079341486267,
+ "eval_precision": 0.6839523475823406,
+ "eval_precision_macro": 0.8078082185158045,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6294743631086747,
+ "eval_recall_macro": 0.7875143892563956,
+ "eval_runtime": 0.2441,
+ "eval_samples_per_second": 667.885,
+ "eval_steps_per_second": 4.097,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2808
+ },
+ {
+ "epoch": 109.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9192001707781057,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6554565327055658,
+ "eval_f1_macro": 0.7970445082288499,
+ "eval_loss": 0.2763550579547882,
+ "eval_pr_auc": 0.6419306315808602,
+ "eval_precision": 0.6848208011243851,
+ "eval_precision_macro": 0.8081695255176081,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6285069332473395,
+ "eval_recall_macro": 0.7871815767220581,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.285,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2834
+ },
+ {
+ "epoch": 110.0,
+ "eval_accuracy": 0.8958206223306895,
+ "eval_auc": 0.9193565525713485,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.655919395465995,
+ "eval_f1_macro": 0.7972685860227431,
+ "eval_loss": 0.27626872062683105,
+ "eval_pr_auc": 0.6423732230660918,
+ "eval_precision": 0.684302733006307,
+ "eval_precision_macro": 0.8080131483516131,
+ "eval_pred_class_0": 16814,
+ "eval_pred_class_1": 2854,
+ "eval_predicted_binding_ratio": 0.1451088061826317,
+ "eval_recall": 0.6297968397291196,
+ "eval_recall_macro": 0.7877058080458841,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.337,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2860
+ },
+ {
+ "epoch": 111.0,
+ "eval_accuracy": 0.8959223103518406,
+ "eval_auc": 0.9193956188221624,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.656485987581809,
+ "eval_f1_macro": 0.7975781647169913,
+ "eval_loss": 0.2761881351470947,
+ "eval_pr_auc": 0.6423085789727141,
+ "eval_precision": 0.6843946815955213,
+ "eval_precision_macro": 0.8081402319339891,
+ "eval_pred_class_0": 16810,
+ "eval_pred_class_1": 2858,
+ "eval_predicted_binding_ratio": 0.1453121822249339,
+ "eval_recall": 0.6307642695904547,
+ "eval_recall_macro": 0.7881593424972857,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.648,
+ "eval_steps_per_second": 3.82,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2886
+ },
+ {
+ "epoch": 112.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9196281671522437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6564757265244414,
+ "eval_f1_macro": 0.7976085010119736,
+ "eval_loss": 0.2759994864463806,
+ "eval_pr_auc": 0.6431408853405497,
+ "eval_precision": 0.685133239831697,
+ "eval_precision_macro": 0.808462195558094,
+ "eval_pred_class_0": 16816,
+ "eval_pred_class_1": 2852,
+ "eval_predicted_binding_ratio": 0.14500711816148057,
+ "eval_recall": 0.6301193163495646,
+ "eval_recall_macro": 0.7879575877939047,
+ "eval_runtime": 0.2423,
+ "eval_samples_per_second": 672.612,
+ "eval_steps_per_second": 4.126,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2912
+ },
+ {
+ "epoch": 113.0,
+ "eval_accuracy": 0.8960239983729916,
+ "eval_auc": 0.9197686265771928,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6561291407432319,
+ "eval_f1_macro": 0.797440712214738,
+ "eval_loss": 0.27580633759498596,
+ "eval_pr_auc": 0.6436439478836922,
+ "eval_precision": 0.685523541813071,
+ "eval_precision_macro": 0.8085803418255701,
+ "eval_pred_class_0": 16822,
+ "eval_pred_class_1": 2846,
+ "eval_predicted_binding_ratio": 0.14470205409802725,
+ "eval_recall": 0.6291518864882296,
+ "eval_recall_macro": 0.7875644143010352,
+ "eval_runtime": 0.2575,
+ "eval_samples_per_second": 632.943,
+ "eval_steps_per_second": 3.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2938
+ },
+ {
+ "epoch": 114.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9199272079152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6573731944910984,
+ "eval_f1_macro": 0.7981312081136818,
+ "eval_loss": 0.27558717131614685,
+ "eval_pr_auc": 0.6441453864761489,
+ "eval_precision": 0.6859446196985629,
+ "eval_precision_macro": 0.8089550633431857,
+ "eval_pred_class_0": 16815,
+ "eval_pred_class_1": 2853,
+ "eval_predicted_binding_ratio": 0.14505796217205613,
+ "eval_recall": 0.6310867462108997,
+ "eval_recall_macro": 0.7885016636831041,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.272,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2964
+ },
+ {
+ "epoch": 115.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9200543727465736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6571428571428571,
+ "eval_f1_macro": 0.7980197003020941,
+ "eval_loss": 0.2754935324192047,
+ "eval_pr_auc": 0.6445859889828064,
+ "eval_precision": 0.6862056862056862,
+ "eval_precision_macro": 0.8090342302245508,
+ "eval_pred_class_0": 16819,
+ "eval_pred_class_1": 2849,
+ "eval_predicted_binding_ratio": 0.1448545861297539,
+ "eval_recall": 0.6304417929700097,
+ "eval_recall_macro": 0.7882395480211912,
+ "eval_runtime": 0.2588,
+ "eval_samples_per_second": 629.903,
+ "eval_steps_per_second": 3.864,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 2990
+ },
+ {
+ "epoch": 115.38461538461539,
+ "grad_norm": 13551.1435546875,
+ "learning_rate": 9.488660254357756e-07,
+ "loss": 0.2594,
+ "step": 3000
+ },
+ {
+ "epoch": 116.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.9201159794649721,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.2753925323486328,
+ "eval_pr_auc": 0.6447630589609926,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.638,
+ "eval_steps_per_second": 3.912,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3016
+ },
+ {
+ "epoch": 117.0,
+ "eval_accuracy": 0.8964815944681717,
+ "eval_auc": 0.920259670079575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6579301075268817,
+ "eval_f1_macro": 0.7984714041109127,
+ "eval_loss": 0.27515658736228943,
+ "eval_pr_auc": 0.6451647424161069,
+ "eval_precision": 0.6867765696246931,
+ "eval_precision_macro": 0.8094048157037065,
+ "eval_pred_class_0": 16817,
+ "eval_pred_class_1": 2851,
+ "eval_predicted_binding_ratio": 0.14495627415090503,
+ "eval_recall": 0.6314092228313447,
+ "eval_recall_macro": 0.7887534434311247,
+ "eval_runtime": 0.2523,
+ "eval_samples_per_second": 645.985,
+ "eval_steps_per_second": 3.963,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3042
+ },
+ {
+ "epoch": 118.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.920327525062304,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6583975863224941,
+ "eval_f1_macro": 0.7986623832121911,
+ "eval_loss": 0.2750197649002075,
+ "eval_pr_auc": 0.6454382852682906,
+ "eval_precision": 0.6855148342059337,
+ "eval_precision_macro": 0.8089241730394068,
+ "eval_pred_class_0": 16803,
+ "eval_pred_class_1": 2865,
+ "eval_predicted_binding_ratio": 0.14566809029896277,
+ "eval_recall": 0.6333440825540149,
+ "eval_recall_macro": 0.7894794294583318,
+ "eval_runtime": 0.2545,
+ "eval_samples_per_second": 640.495,
+ "eval_steps_per_second": 3.929,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3068
+ },
+ {
+ "epoch": 119.0,
+ "eval_accuracy": 0.8963799064470206,
+ "eval_auc": 0.9205032745284716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6585120643431636,
+ "eval_f1_macro": 0.7987177919414212,
+ "eval_loss": 0.27491119503974915,
+ "eval_pr_auc": 0.6460050422984182,
+ "eval_precision": 0.6853854202999651,
+ "eval_precision_macro": 0.8088851986923312,
+ "eval_pred_class_0": 16801,
+ "eval_pred_class_1": 2867,
+ "eval_predicted_binding_ratio": 0.1457697783201139,
+ "eval_recall": 0.6336665591744598,
+ "eval_recall_macro": 0.7896104872892882,
+ "eval_runtime": 0.2295,
+ "eval_samples_per_second": 710.293,
+ "eval_steps_per_second": 4.358,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3094
+ },
+ {
+ "epoch": 120.0,
+ "eval_accuracy": 0.8962782184258694,
+ "eval_auc": 0.9205394599595942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6587487453997992,
+ "eval_f1_macro": 0.7987969999557303,
+ "eval_loss": 0.2747833728790283,
+ "eval_pr_auc": 0.6459576422229408,
+ "eval_precision": 0.6843934654153633,
+ "eval_precision_macro": 0.8084881983738124,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7900743576545822,
+ "eval_runtime": 0.2633,
+ "eval_samples_per_second": 619.079,
+ "eval_steps_per_second": 3.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3120
+ },
+ {
+ "epoch": 121.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9206969317927203,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6591785414920369,
+ "eval_f1_macro": 0.7991286912009045,
+ "eval_loss": 0.2746541202068329,
+ "eval_pr_auc": 0.6466379382676535,
+ "eval_precision": 0.6864525139664804,
+ "eval_precision_macro": 0.8094545359644352,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6339890357949048,
+ "eval_recall_macro": 0.7898924475165747,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.354,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3146
+ },
+ {
+ "epoch": 122.0,
+ "eval_accuracy": 0.8966341264998983,
+ "eval_auc": 0.9208301697034432,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6592927769398358,
+ "eval_f1_macro": 0.7991839832435101,
+ "eval_loss": 0.2745382785797119,
+ "eval_pr_auc": 0.6470920919458031,
+ "eval_precision": 0.6863224005582693,
+ "eval_precision_macro": 0.809415217658018,
+ "eval_pred_class_0": 16802,
+ "eval_pred_class_1": 2866,
+ "eval_predicted_binding_ratio": 0.14571893430953833,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7900235053475313,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.372,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3172
+ },
+ {
+ "epoch": 123.0,
+ "eval_accuracy": 0.8967358145210494,
+ "eval_auc": 0.9209352222971863,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6597420003350645,
+ "eval_f1_macro": 0.7994367387715422,
+ "eval_loss": 0.2744734585285187,
+ "eval_pr_auc": 0.6474021950727136,
+ "eval_precision": 0.6865411436541143,
+ "eval_precision_macro": 0.809580095636581,
+ "eval_pred_class_0": 16800,
+ "eval_pred_class_1": 2868,
+ "eval_predicted_binding_ratio": 0.14582062233068943,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7903459819679763,
+ "eval_runtime": 0.2539,
+ "eval_samples_per_second": 641.926,
+ "eval_steps_per_second": 3.938,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3198
+ },
+ {
+ "epoch": 124.0,
+ "eval_accuracy": 0.896786658531625,
+ "eval_auc": 0.9210344159265571,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6599664991624791,
+ "eval_f1_macro": 0.7995630613656908,
+ "eval_loss": 0.2742863893508911,
+ "eval_pr_auc": 0.6479185906925482,
+ "eval_precision": 0.6866504008365284,
+ "eval_precision_macro": 0.8096624823993346,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.6352789422766849,
+ "eval_recall_macro": 0.7905072202781989,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.863,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3224
+ },
+ {
+ "epoch": 125.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9211382130279347,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6603015075376885,
+ "eval_f1_macro": 0.7997605361820792,
+ "eval_loss": 0.27425193786621094,
+ "eval_pr_auc": 0.6481518613470144,
+ "eval_precision": 0.6869989543394911,
+ "eval_precision_macro": 0.8098665228272252,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7906986390676873,
+ "eval_runtime": 0.2655,
+ "eval_samples_per_second": 613.835,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3250
+ },
+ {
+ "epoch": 126.0,
+ "eval_accuracy": 0.896888346552776,
+ "eval_auc": 0.9212811737051158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6598456893659845,
+ "eval_f1_macro": 0.7995399118610351,
+ "eval_loss": 0.2741680145263672,
+ "eval_pr_auc": 0.6487724280436702,
+ "eval_precision": 0.6875218455085634,
+ "eval_precision_macro": 0.8100249793973471,
+ "eval_pred_class_0": 16807,
+ "eval_pred_class_1": 2861,
+ "eval_predicted_binding_ratio": 0.14546471425666058,
+ "eval_recall": 0.6343115124153499,
+ "eval_recall_macro": 0.7901744077438613,
+ "eval_runtime": 0.2775,
+ "eval_samples_per_second": 587.468,
+ "eval_steps_per_second": 3.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3276
+ },
+ {
+ "epoch": 127.0,
+ "eval_accuracy": 0.8966849705104739,
+ "eval_auc": 0.9212962201485035,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608811748998665,
+ "eval_f1_macro": 0.7999703379297798,
+ "eval_loss": 0.2739817500114441,
+ "eval_pr_auc": 0.648761125236648,
+ "eval_precision": 0.6848841231407817,
+ "eval_precision_macro": 0.809033228048307,
+ "eval_pred_class_0": 16777,
+ "eval_pred_class_1": 2891,
+ "eval_predicted_binding_ratio": 0.1469900345739272,
+ "eval_recall": 0.6385037084811351,
+ "eval_recall_macro": 0.7917574376292318,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.003,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3302
+ },
+ {
+ "epoch": 128.0,
+ "eval_accuracy": 0.8971425666056538,
+ "eval_auc": 0.9213870048987755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6608549874266555,
+ "eval_f1_macro": 0.8001167448595325,
+ "eval_loss": 0.2739529609680176,
+ "eval_pr_auc": 0.6489216955933252,
+ "eval_precision": 0.6881983240223464,
+ "eval_precision_macro": 0.8104762150937725,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.63560141889713,
+ "eval_recall_macro": 0.7908495414640173,
+ "eval_runtime": 0.26,
+ "eval_samples_per_second": 626.828,
+ "eval_steps_per_second": 3.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3328
+ },
+ {
+ "epoch": 129.0,
+ "eval_accuracy": 0.8970408785845028,
+ "eval_auc": 0.9215601460552225,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6604058359885964,
+ "eval_f1_macro": 0.7998640212813866,
+ "eval_loss": 0.27383002638816833,
+ "eval_pr_auc": 0.6495277773578755,
+ "eval_precision": 0.6879804332634522,
+ "eval_precision_macro": 0.8103117684584547,
+ "eval_pred_class_0": 16806,
+ "eval_pred_class_1": 2862,
+ "eval_predicted_binding_ratio": 0.1455155582672361,
+ "eval_recall": 0.63495646565624,
+ "eval_recall_macro": 0.7905270648435724,
+ "eval_runtime": 0.2665,
+ "eval_samples_per_second": 611.602,
+ "eval_steps_per_second": 3.752,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3354
+ },
+ {
+ "epoch": 130.0,
+ "eval_accuracy": 0.8969900345739272,
+ "eval_auc": 0.9216559138449605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6610906657745065,
+ "eval_f1_macro": 0.8001778048579948,
+ "eval_loss": 0.27363157272338867,
+ "eval_pr_auc": 0.6497825902519792,
+ "eval_precision": 0.6868265554396942,
+ "eval_precision_macro": 0.8099131883862756,
+ "eval_pred_class_0": 16791,
+ "eval_pred_class_1": 2877,
+ "eval_predicted_binding_ratio": 0.14627821842586944,
+ "eval_recall": 0.6372138019993551,
+ "eval_recall_macro": 0.7914142891810019,
+ "eval_runtime": 0.2147,
+ "eval_samples_per_second": 759.086,
+ "eval_steps_per_second": 4.657,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3380
+ },
+ {
+ "epoch": 131.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9217394576160085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6619906479625919,
+ "eval_f1_macro": 0.800648676506185,
+ "eval_loss": 0.27347350120544434,
+ "eval_pr_auc": 0.6501280761818352,
+ "eval_precision": 0.686525805334257,
+ "eval_precision_macro": 0.8099216238398834,
+ "eval_pred_class_0": 16781,
+ "eval_pred_class_1": 2887,
+ "eval_predicted_binding_ratio": 0.14678665853162498,
+ "eval_recall": 0.6391486617220251,
+ "eval_recall_macro": 0.7922609971252728,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.642,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3406
+ },
+ {
+ "epoch": 132.0,
+ "eval_accuracy": 0.8970917225950783,
+ "eval_auc": 0.9218276924515536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6609715242881072,
+ "eval_f1_macro": 0.8001554858148563,
+ "eval_loss": 0.2734222412109375,
+ "eval_pr_auc": 0.6505561229387223,
+ "eval_precision": 0.6876960613454165,
+ "eval_precision_macro": 0.8102746036830064,
+ "eval_pred_class_0": 16799,
+ "eval_pred_class_1": 2869,
+ "eval_predicted_binding_ratio": 0.145871466341265,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7910814766466644,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.527,
+ "eval_steps_per_second": 3.893,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3432
+ },
+ {
+ "epoch": 133.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.9219583415175538,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6620805369127517,
+ "eval_f1_macro": 0.8008688878235859,
+ "eval_loss": 0.273334801197052,
+ "eval_pr_auc": 0.6510448354697362,
+ "eval_precision": 0.6901014340678558,
+ "eval_precision_macro": 0.8114972635268781,
+ "eval_pred_class_0": 16809,
+ "eval_pred_class_1": 2859,
+ "eval_predicted_binding_ratio": 0.14536302623550945,
+ "eval_recall": 0.63624637213802,
+ "eval_recall_macro": 0.7913832814393245,
+ "eval_runtime": 0.2542,
+ "eval_samples_per_second": 641.276,
+ "eval_steps_per_second": 3.934,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3458
+ },
+ {
+ "epoch": 134.0,
+ "eval_accuracy": 0.8976001627008339,
+ "eval_auc": 0.92201595791138,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6632107023411371,
+ "eval_f1_macro": 0.8014158800109571,
+ "eval_loss": 0.2731546461582184,
+ "eval_pr_auc": 0.6511084632800272,
+ "eval_precision": 0.6887808266759291,
+ "eval_precision_macro": 0.8110948031169865,
+ "eval_pred_class_0": 16789,
+ "eval_pred_class_1": 2879,
+ "eval_predicted_binding_ratio": 0.14637990644702054,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7926938597488895,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 638.927,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3484
+ },
+ {
+ "epoch": 134.6153846153846,
+ "grad_norm": 16295.5498046875,
+ "learning_rate": 9.153428025759045e-07,
+ "loss": 0.2515,
+ "step": 3500
+ },
+ {
+ "epoch": 135.0,
+ "eval_accuracy": 0.8977526947325605,
+ "eval_auc": 0.9221653809678686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.663543583737661,
+ "eval_f1_macro": 0.8016300010477627,
+ "eval_loss": 0.2730526030063629,
+ "eval_pr_auc": 0.6517474669432921,
+ "eval_precision": 0.6894993045897079,
+ "eval_precision_macro": 0.8114599905511665,
+ "eval_pred_class_0": 16792,
+ "eval_pred_class_1": 2876,
+ "eval_predicted_binding_ratio": 0.14622737441529388,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7927844011866875,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.318,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3510
+ },
+ {
+ "epoch": 136.0,
+ "eval_accuracy": 0.8976510067114094,
+ "eval_auc": 0.9222144814251072,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6634342083263668,
+ "eval_f1_macro": 0.8015417181640828,
+ "eval_loss": 0.27299538254737854,
+ "eval_pr_auc": 0.6516584418617962,
+ "eval_precision": 0.6888888888888889,
+ "eval_precision_macro": 0.8111766341037249,
+ "eval_pred_class_0": 16788,
+ "eval_pred_class_1": 2880,
+ "eval_predicted_binding_ratio": 0.1464307504575961,
+ "eval_recall": 0.6397936149629152,
+ "eval_recall_macro": 0.792855098059112,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.888,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3536
+ },
+ {
+ "epoch": 137.0,
+ "eval_accuracy": 0.8978543827537117,
+ "eval_auc": 0.922406970789481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6637656903765691,
+ "eval_f1_macro": 0.8017728364954996,
+ "eval_loss": 0.27294018864631653,
+ "eval_pr_auc": 0.6525345183514315,
+ "eval_precision": 0.6899791231732777,
+ "eval_precision_macro": 0.8117038643138033,
+ "eval_pred_class_0": 16794,
+ "eval_pred_class_1": 2874,
+ "eval_predicted_binding_ratio": 0.14612568639414278,
+ "eval_recall": 0.6394711383424702,
+ "eval_recall_macro": 0.7928447621452195,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.531,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3562
+ },
+ {
+ "epoch": 138.0,
+ "eval_accuracy": 0.8979560707748627,
+ "eval_auc": 0.9224206449505158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6643251379829402,
+ "eval_f1_macro": 0.8020789283763069,
+ "eval_loss": 0.272890567779541,
+ "eval_pr_auc": 0.652455646685698,
+ "eval_precision": 0.6900625434329395,
+ "eval_precision_macro": 0.8118269834496443,
+ "eval_pred_class_0": 16790,
+ "eval_pred_class_1": 2878,
+ "eval_predicted_binding_ratio": 0.14632906243644497,
+ "eval_recall": 0.6404385682038052,
+ "eval_recall_macro": 0.793298296596621,
+ "eval_runtime": 0.2465,
+ "eval_samples_per_second": 661.298,
+ "eval_steps_per_second": 4.057,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3588
+ },
+ {
+ "epoch": 139.0,
+ "eval_accuracy": 0.8980069147854383,
+ "eval_auc": 0.9225580970332872,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6647727272727273,
+ "eval_f1_macro": 0.802313204605421,
+ "eval_loss": 0.2726689577102661,
+ "eval_pr_auc": 0.6528952820360587,
+ "eval_precision": 0.6899063475546305,
+ "eval_precision_macro": 0.8118283599554506,
+ "eval_pred_class_0": 16785,
+ "eval_pred_class_1": 2883,
+ "eval_predicted_binding_ratio": 0.14658328248932276,
+ "eval_recall": 0.6414059980651403,
+ "eval_recall_macro": 0.7937216505687565,
+ "eval_runtime": 0.1981,
+ "eval_samples_per_second": 822.798,
+ "eval_steps_per_second": 5.048,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3614
+ },
+ {
+ "epoch": 140.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9226611836622408,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6642078792958928,
+ "eval_f1_macro": 0.8020928521767887,
+ "eval_loss": 0.2726409435272217,
+ "eval_pr_auc": 0.6533512747607447,
+ "eval_precision": 0.6916899441340782,
+ "eval_precision_macro": 0.8125195733524473,
+ "eval_pred_class_0": 16804,
+ "eval_pred_class_1": 2864,
+ "eval_predicted_binding_ratio": 0.14561724628838724,
+ "eval_recall": 0.6388261851015802,
+ "eval_recall_macro": 0.7927637293589026,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.759,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3640
+ },
+ {
+ "epoch": 141.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9227806014244446,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.666110183639399,
+ "eval_f1_macro": 0.8030664874893451,
+ "eval_loss": 0.2725253105163574,
+ "eval_pr_auc": 0.6539162157851042,
+ "eval_precision": 0.6905503634475597,
+ "eval_precision_macro": 0.8123173177271173,
+ "eval_pred_class_0": 16779,
+ "eval_pred_class_1": 2889,
+ "eval_predicted_binding_ratio": 0.14688834655277608,
+ "eval_recall": 0.6433408577878104,
+ "eval_recall_macro": 0.7946890804300916,
+ "eval_runtime": 0.2669,
+ "eval_samples_per_second": 610.673,
+ "eval_steps_per_second": 3.746,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3666
+ },
+ {
+ "epoch": 142.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9228251470721713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6665556850341269,
+ "eval_f1_macro": 0.8032289361592369,
+ "eval_loss": 0.27245020866394043,
+ "eval_pr_auc": 0.6537875546315605,
+ "eval_precision": 0.6889194769442533,
+ "eval_precision_macro": 0.8116772542816959,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6455981941309256,
+ "eval_recall_macro": 0.795515943808989,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.157,
+ "eval_steps_per_second": 3.903,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3692
+ },
+ {
+ "epoch": 143.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9229446329618678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6663324979114453,
+ "eval_f1_macro": 0.8032271166958205,
+ "eval_loss": 0.2723686695098877,
+ "eval_pr_auc": 0.6543603067705216,
+ "eval_precision": 0.6914008321775312,
+ "eval_precision_macro": 0.8127225800544472,
+ "eval_pred_class_0": 16784,
+ "eval_pred_class_1": 2884,
+ "eval_predicted_binding_ratio": 0.14663412649989832,
+ "eval_recall": 0.6430183811673653,
+ "eval_recall_macro": 0.7946485640369331,
+ "eval_runtime": 0.2531,
+ "eval_samples_per_second": 644.012,
+ "eval_steps_per_second": 3.951,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3718
+ },
+ {
+ "epoch": 144.0,
+ "eval_accuracy": 0.898159446817165,
+ "eval_auc": 0.9229539761608667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6658882402001668,
+ "eval_f1_macro": 0.8029060288610683,
+ "eval_loss": 0.2724270820617676,
+ "eval_pr_auc": 0.6540924505284794,
+ "eval_precision": 0.6897028334485141,
+ "eval_precision_macro": 0.8119135366717949,
+ "eval_pred_class_0": 16774,
+ "eval_pred_class_1": 2894,
+ "eval_predicted_binding_ratio": 0.14714256660565386,
+ "eval_recall": 0.6436633344082554,
+ "eval_recall_macro": 0.7947295968232501,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.29,
+ "eval_steps_per_second": 3.879,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3744
+ },
+ {
+ "epoch": 145.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.923075982767793,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671105193075899,
+ "eval_f1_macro": 0.8035504588856721,
+ "eval_loss": 0.27218085527420044,
+ "eval_pr_auc": 0.6548951387334131,
+ "eval_precision": 0.6893704850361198,
+ "eval_precision_macro": 0.8119604647601695,
+ "eval_pred_class_0": 16761,
+ "eval_pred_class_1": 2907,
+ "eval_predicted_binding_ratio": 0.14780353874313606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7958686009087,
+ "eval_runtime": 0.2047,
+ "eval_samples_per_second": 796.353,
+ "eval_steps_per_second": 4.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3770
+ },
+ {
+ "epoch": 146.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.9231917800403848,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671111851975329,
+ "eval_f1_macro": 0.8036038872863508,
+ "eval_loss": 0.27211907505989075,
+ "eval_pr_auc": 0.6553363499797747,
+ "eval_precision": 0.6904761904761905,
+ "eval_precision_macro": 0.8124414345344577,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7955659688536286,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 629.971,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3796
+ },
+ {
+ "epoch": 147.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9232496689441817,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676661669165417,
+ "eval_f1_macro": 0.8039077842052783,
+ "eval_loss": 0.2721000015735626,
+ "eval_pr_auc": 0.655340056976625,
+ "eval_precision": 0.6905582356995176,
+ "eval_precision_macro": 0.812564099359958,
+ "eval_pred_class_0": 16766,
+ "eval_pred_class_1": 2902,
+ "eval_predicted_binding_ratio": 0.14754931869025828,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7960195033050301,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.767,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3822
+ },
+ {
+ "epoch": 148.0,
+ "eval_accuracy": 0.8985661989017694,
+ "eval_auc": 0.9233701767462686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6673336668334167,
+ "eval_f1_macro": 0.8037469198020228,
+ "eval_loss": 0.2719952464103699,
+ "eval_pr_auc": 0.6558934902075464,
+ "eval_precision": 0.6909530386740331,
+ "eval_precision_macro": 0.8126837695158862,
+ "eval_pred_class_0": 16772,
+ "eval_pred_class_1": 2896,
+ "eval_predicted_binding_ratio": 0.14724425462680496,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956263298121606,
+ "eval_runtime": 0.2562,
+ "eval_samples_per_second": 636.275,
+ "eval_steps_per_second": 3.904,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3848
+ },
+ {
+ "epoch": 149.0,
+ "eval_accuracy": 0.8983628228594671,
+ "eval_auc": 0.9234171749837325,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6676641729010806,
+ "eval_f1_macro": 0.8038359878940744,
+ "eval_loss": 0.27190613746643066,
+ "eval_pr_auc": 0.6558672612955618,
+ "eval_precision": 0.6890871654083733,
+ "eval_precision_macro": 0.8119245066626444,
+ "eval_pred_class_0": 16754,
+ "eval_pred_class_1": 2914,
+ "eval_predicted_binding_ratio": 0.14815944681716495,
+ "eval_recall": 0.6475330538535956,
+ "eval_recall_macro": 0.796423012711792,
+ "eval_runtime": 0.1964,
+ "eval_samples_per_second": 830.09,
+ "eval_steps_per_second": 5.093,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3874
+ },
+ {
+ "epoch": 150.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9235316972989609,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6682204095222241,
+ "eval_f1_macro": 0.8042113179058208,
+ "eval_loss": 0.2718164622783661,
+ "eval_pr_auc": 0.6562427373314145,
+ "eval_precision": 0.6906400550584997,
+ "eval_precision_macro": 0.8126866902186664,
+ "eval_pred_class_0": 16762,
+ "eval_pred_class_1": 2906,
+ "eval_predicted_binding_ratio": 0.1477526947325605,
+ "eval_recall": 0.6472105772331506,
+ "eval_recall_macro": 0.7964730377564316,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.207,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3900
+ },
+ {
+ "epoch": 151.0,
+ "eval_accuracy": 0.8984645108806183,
+ "eval_auc": 0.923607970893288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.66744379683597,
+ "eval_f1_macro": 0.8037648014211952,
+ "eval_loss": 0.27192452549934387,
+ "eval_pr_auc": 0.6563118529429329,
+ "eval_precision": 0.6900826446280992,
+ "eval_precision_macro": 0.8123224008156005,
+ "eval_pred_class_0": 16764,
+ "eval_pred_class_1": 2904,
+ "eval_predicted_binding_ratio": 0.1476510067114094,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7959591423464981,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.181,
+ "eval_steps_per_second": 5.645,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3926
+ },
+ {
+ "epoch": 152.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9236869111923289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6687707641196013,
+ "eval_f1_macro": 0.8044598207679289,
+ "eval_loss": 0.2716231048107147,
+ "eval_pr_auc": 0.6568319718650024,
+ "eval_precision": 0.6896197327852004,
+ "eval_precision_macro": 0.812330315374629,
+ "eval_pred_class_0": 16749,
+ "eval_pred_class_1": 2919,
+ "eval_predicted_binding_ratio": 0.1484136668700427,
+ "eval_recall": 0.6491454369558207,
+ "eval_recall_macro": 0.7972292042629046,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.369,
+ "eval_steps_per_second": 4.15,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3952
+ },
+ {
+ "epoch": 153.0,
+ "eval_accuracy": 0.8986678869229204,
+ "eval_auc": 0.9238590985638783,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6671120761650242,
+ "eval_f1_macro": 0.8036750821318089,
+ "eval_loss": 0.2716236114501953,
+ "eval_pr_auc": 0.657752547087354,
+ "eval_precision": 0.691961191961192,
+ "eval_precision_macro": 0.8130882112827054,
+ "eval_pred_class_0": 16782,
+ "eval_pred_class_1": 2886,
+ "eval_predicted_binding_ratio": 0.14673581452104942,
+ "eval_recall": 0.6439858110287005,
+ "eval_recall_macro": 0.7951624594468667,
+ "eval_runtime": 0.2469,
+ "eval_samples_per_second": 660.315,
+ "eval_steps_per_second": 4.051,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 3978
+ },
+ {
+ "epoch": 153.84615384615384,
+ "grad_norm": 13863.017578125,
+ "learning_rate": 8.743443888522679e-07,
+ "loss": 0.244,
+ "step": 4000
+ },
+ {
+ "epoch": 154.0,
+ "eval_accuracy": 0.8986170429123449,
+ "eval_auc": 0.9239073328287097,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6674449633088726,
+ "eval_f1_macro": 0.8038184624582756,
+ "eval_loss": 0.27161940932273865,
+ "eval_pr_auc": 0.6577434889769634,
+ "eval_precision": 0.6911917098445596,
+ "eval_precision_macro": 0.8128050601926549,
+ "eval_pred_class_0": 16773,
+ "eval_pred_class_1": 2895,
+ "eval_predicted_binding_ratio": 0.1471934106162294,
+ "eval_recall": 0.6452757175104805,
+ "eval_recall_macro": 0.7956565102914266,
+ "eval_runtime": 0.2311,
+ "eval_samples_per_second": 705.281,
+ "eval_steps_per_second": 4.327,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4004
+ },
+ {
+ "epoch": 155.0,
+ "eval_accuracy": 0.8983119788488916,
+ "eval_auc": 0.9240325316952941,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6666666666666666,
+ "eval_f1_macro": 0.8033357331413487,
+ "eval_loss": 0.2715211510658264,
+ "eval_pr_auc": 0.6581299979478261,
+ "eval_precision": 0.689893066574681,
+ "eval_precision_macro": 0.812118099868532,
+ "eval_pred_class_0": 16769,
+ "eval_pred_class_1": 2899,
+ "eval_predicted_binding_ratio": 0.14739678665853162,
+ "eval_recall": 0.6449532408900355,
+ "eval_recall_macro": 0.7953443695848741,
+ "eval_runtime": 0.2265,
+ "eval_samples_per_second": 719.735,
+ "eval_steps_per_second": 4.416,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4030
+ },
+ {
+ "epoch": 156.0,
+ "eval_accuracy": 0.8987187309334961,
+ "eval_auc": 0.9240861966945435,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6688829787234043,
+ "eval_f1_macro": 0.8045495325789891,
+ "eval_loss": 0.27123013138771057,
+ "eval_pr_auc": 0.6585643355556502,
+ "eval_precision": 0.6902229845626072,
+ "eval_precision_macro": 0.8126098507842583,
+ "eval_pred_class_0": 16753,
+ "eval_pred_class_1": 2915,
+ "eval_predicted_binding_ratio": 0.14821029082774048,
+ "eval_recall": 0.6488229603353757,
+ "eval_recall_macro": 0.7971585073904801,
+ "eval_runtime": 0.2445,
+ "eval_samples_per_second": 666.601,
+ "eval_steps_per_second": 4.09,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4056
+ },
+ {
+ "epoch": 157.0,
+ "eval_accuracy": 0.8987695749440716,
+ "eval_auc": 0.9242226073999265,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.668111351891982,
+ "eval_f1_macro": 0.8041939607346642,
+ "eval_loss": 0.2712385952472687,
+ "eval_pr_auc": 0.6591839305732792,
+ "eval_precision": 0.6915113871635611,
+ "eval_precision_macro": 0.8130484783164258,
+ "eval_pred_class_0": 16770,
+ "eval_pred_class_1": 2898,
+ "eval_predicted_binding_ratio": 0.14734594264795606,
+ "eval_recall": 0.6462431473718155,
+ "eval_recall_macro": 0.7961402252220942,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 634.046,
+ "eval_steps_per_second": 3.89,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4082
+ },
+ {
+ "epoch": 158.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9241500613527003,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6699883740242485,
+ "eval_f1_macro": 0.8051727852411502,
+ "eval_loss": 0.27118799090385437,
+ "eval_pr_auc": 0.6587023192472763,
+ "eval_precision": 0.6907534246575342,
+ "eval_precision_macro": 0.8130146392454138,
+ "eval_pred_class_0": 16748,
+ "eval_pred_class_1": 2920,
+ "eval_predicted_binding_ratio": 0.14846451088061827,
+ "eval_recall": 0.6504353434376008,
+ "eval_recall_macro": 0.7979646989415927,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.927,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4108
+ },
+ {
+ "epoch": 159.0,
+ "eval_accuracy": 0.8990237949969494,
+ "eval_auc": 0.9243365652302152,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6693306693306693,
+ "eval_f1_macro": 0.8048723553674049,
+ "eval_loss": 0.2712218463420868,
+ "eval_pr_auc": 0.6594296109425748,
+ "eval_precision": 0.6919104991394148,
+ "eval_precision_macro": 0.8134133417966358,
+ "eval_pred_class_0": 16763,
+ "eval_pred_class_1": 2905,
+ "eval_predicted_binding_ratio": 0.14770185072198494,
+ "eval_recall": 0.6481780070944857,
+ "eval_recall_macro": 0.7970774746041632,
+ "eval_runtime": 0.256,
+ "eval_samples_per_second": 636.627,
+ "eval_steps_per_second": 3.906,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4134
+ },
+ {
+ "epoch": 160.0,
+ "eval_accuracy": 0.8990746390075249,
+ "eval_auc": 0.9243664342695147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6709762970329852,
+ "eval_f1_macro": 0.805686028587357,
+ "eval_loss": 0.2712063789367676,
+ "eval_pr_auc": 0.6596905428752633,
+ "eval_precision": 0.6903137789904502,
+ "eval_precision_macro": 0.8129807422676916,
+ "eval_pred_class_0": 16736,
+ "eval_pred_class_1": 2932,
+ "eval_predicted_binding_ratio": 0.1490746390075249,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7989424647168202,
+ "eval_runtime": 0.1787,
+ "eval_samples_per_second": 912.327,
+ "eval_steps_per_second": 5.597,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4160
+ },
+ {
+ "epoch": 161.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9244822704721024,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710643708705902,
+ "eval_pr_auc": 0.6603246471492675,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2165,
+ "eval_samples_per_second": 752.853,
+ "eval_steps_per_second": 4.619,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4186
+ },
+ {
+ "epoch": 162.0,
+ "eval_accuracy": 0.8989729509863738,
+ "eval_auc": 0.9245231275027241,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6700979578283247,
+ "eval_f1_macro": 0.8052257867669526,
+ "eval_loss": 0.2710554301738739,
+ "eval_pr_auc": 0.6603878428843051,
+ "eval_precision": 0.6906228610540726,
+ "eval_precision_macro": 0.8129753502690642,
+ "eval_pred_class_0": 16746,
+ "eval_pred_class_1": 2922,
+ "eval_predicted_binding_ratio": 0.14856619890176936,
+ "eval_recall": 0.6507578200580458,
+ "eval_recall_macro": 0.7980957567725492,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.523,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4212
+ },
+ {
+ "epoch": 163.0,
+ "eval_accuracy": 0.8993288590604027,
+ "eval_auc": 0.9246038585815736,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6719681908548708,
+ "eval_f1_macro": 0.8062543656977057,
+ "eval_loss": 0.27094030380249023,
+ "eval_pr_auc": 0.6606722300563197,
+ "eval_precision": 0.6909710391822828,
+ "eval_precision_macro": 0.8134231279100321,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.653982586262496,
+ "eval_recall_macro": 0.7996175984369762,
+ "eval_runtime": 0.2218,
+ "eval_samples_per_second": 735.026,
+ "eval_steps_per_second": 4.509,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4238
+ },
+ {
+ "epoch": 164.0,
+ "eval_accuracy": 0.8992271710392515,
+ "eval_auc": 0.9247262642209572,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6713101160862355,
+ "eval_f1_macro": 0.8059006594362601,
+ "eval_loss": 0.2709755003452301,
+ "eval_pr_auc": 0.6610856256039915,
+ "eval_precision": 0.691020826220553,
+ "eval_precision_macro": 0.8133400325618567,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6526926797807159,
+ "eval_recall_macro": 0.7990330061546183,
+ "eval_runtime": 0.2493,
+ "eval_samples_per_second": 653.878,
+ "eval_steps_per_second": 4.012,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4264
+ },
+ {
+ "epoch": 165.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9247699826062725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6738446248136492,
+ "eval_f1_macro": 0.8073568599908361,
+ "eval_loss": 0.2707850933074951,
+ "eval_pr_auc": 0.6613417671448518,
+ "eval_precision": 0.6927792915531336,
+ "eval_precision_macro": 0.8145046350187375,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007359306946413,
+ "eval_runtime": 0.1724,
+ "eval_samples_per_second": 945.745,
+ "eval_steps_per_second": 5.802,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4290
+ },
+ {
+ "epoch": 166.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9248060123174118,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6753719008264463,
+ "eval_f1_macro": 0.8081840577256068,
+ "eval_loss": 0.2708885669708252,
+ "eval_pr_auc": 0.6612557604235284,
+ "eval_precision": 0.6927772126144456,
+ "eval_precision_macro": 0.8147479579430862,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6588197355691713,
+ "eval_recall_macro": 0.8020663535695799,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.671,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4316
+ },
+ {
+ "epoch": 167.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9248204358808662,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741721854304635,
+ "eval_f1_macro": 0.8075329932438238,
+ "eval_loss": 0.270906925201416,
+ "eval_pr_auc": 0.6612342465257918,
+ "eval_precision": 0.692752636951344,
+ "eval_precision_macro": 0.8145453662370445,
+ "eval_pred_class_0": 16729,
+ "eval_pred_class_1": 2939,
+ "eval_predicted_binding_ratio": 0.1494305470815538,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010282268358203,
+ "eval_runtime": 0.2263,
+ "eval_samples_per_second": 720.252,
+ "eval_steps_per_second": 4.419,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4342
+ },
+ {
+ "epoch": 168.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9249249239896699,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706829011440277,
+ "eval_pr_auc": 0.6618605064537387,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.89,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4368
+ },
+ {
+ "epoch": 169.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9250021027063997,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2705872058868408,
+ "eval_pr_auc": 0.6621173041985378,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.2622,
+ "eval_samples_per_second": 621.716,
+ "eval_steps_per_second": 3.814,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4394
+ },
+ {
+ "epoch": 170.0,
+ "eval_accuracy": 0.900091519219036,
+ "eval_auc": 0.9250495097088196,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6750454770960806,
+ "eval_f1_macro": 0.8080084845902765,
+ "eval_loss": 0.2706546485424042,
+ "eval_pr_auc": 0.6620136434657915,
+ "eval_precision": 0.6928038017651053,
+ "eval_precision_macro": 0.8147071275300828,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8017740574284009,
+ "eval_runtime": 0.2696,
+ "eval_samples_per_second": 604.539,
+ "eval_steps_per_second": 3.709,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4420
+ },
+ {
+ "epoch": 171.0,
+ "eval_accuracy": 0.8999389871873094,
+ "eval_auc": 0.9250856659424456,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6748182419035029,
+ "eval_f1_macro": 0.8078453665953039,
+ "eval_loss": 0.27060601115226746,
+ "eval_pr_auc": 0.6621340116082275,
+ "eval_precision": 0.6919688241274145,
+ "eval_precision_macro": 0.814310068581025,
+ "eval_pred_class_0": 16717,
+ "eval_pred_class_1": 2951,
+ "eval_predicted_binding_ratio": 0.15004067520846046,
+ "eval_recall": 0.6584972589487262,
+ "eval_recall_macro": 0.8018145738215594,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.143,
+ "eval_steps_per_second": 3.958,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4446
+ },
+ {
+ "epoch": 172.0,
+ "eval_accuracy": 0.9000406752084604,
+ "eval_auc": 0.9252334636716082,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6741796486576069,
+ "eval_f1_macro": 0.8075720776469225,
+ "eval_loss": 0.27041611075401306,
+ "eval_pr_auc": 0.6630610344326174,
+ "eval_precision": 0.6934878963518581,
+ "eval_precision_macro": 0.8148646532849819,
+ "eval_pred_class_0": 16735,
+ "eval_pred_class_1": 2933,
+ "eval_predicted_binding_ratio": 0.14912548301810047,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8008264721324394,
+ "eval_runtime": 0.2332,
+ "eval_samples_per_second": 698.832,
+ "eval_steps_per_second": 4.287,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4472
+ },
+ {
+ "epoch": 173.0,
+ "eval_accuracy": 0.8998372991661582,
+ "eval_auc": 0.9253198298673536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740569159497022,
+ "eval_f1_macro": 0.8074417704823604,
+ "eval_loss": 0.2705075442790985,
+ "eval_pr_auc": 0.662984991174244,
+ "eval_precision": 0.6921508664627931,
+ "eval_precision_macro": 0.8142667635751932,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.8010989237082449,
+ "eval_runtime": 0.2134,
+ "eval_samples_per_second": 763.793,
+ "eval_steps_per_second": 4.686,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4498
+ },
+ {
+ "epoch": 173.07692307692307,
+ "grad_norm": 15784.1748046875,
+ "learning_rate": 8.266086590174684e-07,
+ "loss": 0.2376,
+ "step": 4500
+ },
+ {
+ "epoch": 174.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9254431016991443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6740679370339685,
+ "eval_f1_macro": 0.8075003208787752,
+ "eval_loss": 0.2703414559364319,
+ "eval_pr_auc": 0.6637127837233647,
+ "eval_precision": 0.6932515337423313,
+ "eval_precision_macro": 0.8147445669189726,
+ "eval_pred_class_0": 16734,
+ "eval_pred_class_1": 2934,
+ "eval_predicted_binding_ratio": 0.14917632702867603,
+ "eval_recall": 0.6559174459851661,
+ "eval_recall_macro": 0.8007962916531735,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.53,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4524
+ },
+ {
+ "epoch": 175.0,
+ "eval_accuracy": 0.9001423632296115,
+ "eval_auc": 0.9255452150782025,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6746189529489728,
+ "eval_f1_macro": 0.8078199869849969,
+ "eval_loss": 0.27038928866386414,
+ "eval_pr_auc": 0.6639609479782242,
+ "eval_precision": 0.6936967632027258,
+ "eval_precision_macro": 0.8150250385068789,
+ "eval_pred_class_0": 16733,
+ "eval_pred_class_1": 2935,
+ "eval_predicted_binding_ratio": 0.14922717103925157,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8011489487528844,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4550
+ },
+ {
+ "epoch": 176.0,
+ "eval_accuracy": 0.8999898311978849,
+ "eval_auc": 0.9255934298780361,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6742838218248054,
+ "eval_f1_macro": 0.8076047153692607,
+ "eval_loss": 0.2702932059764862,
+ "eval_pr_auc": 0.6640830183725597,
+ "eval_precision": 0.6929884275017019,
+ "eval_precision_macro": 0.8146651641393745,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6565623992260561,
+ "eval_recall_macro": 0.8010584073150864,
+ "eval_runtime": 0.1918,
+ "eval_samples_per_second": 849.843,
+ "eval_steps_per_second": 5.214,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4576
+ },
+ {
+ "epoch": 177.0,
+ "eval_accuracy": 0.8998881431767338,
+ "eval_auc": 0.9255964274877148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6745992397950752,
+ "eval_f1_macro": 0.8077217319600283,
+ "eval_loss": 0.27021023631095886,
+ "eval_pr_auc": 0.6640237853140233,
+ "eval_precision": 0.691864406779661,
+ "eval_precision_macro": 0.8142298466485935,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6581747823282812,
+ "eval_recall_macro": 0.8016533355113369,
+ "eval_runtime": 0.2437,
+ "eval_samples_per_second": 668.962,
+ "eval_steps_per_second": 4.104,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4602
+ },
+ {
+ "epoch": 178.0,
+ "eval_accuracy": 0.900549115314216,
+ "eval_auc": 0.9257395146873824,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6756218905472637,
+ "eval_f1_macro": 0.8084468667292255,
+ "eval_loss": 0.27011793851852417,
+ "eval_pr_auc": 0.6647736112265655,
+ "eval_precision": 0.695459201092523,
+ "eval_precision_macro": 0.8159475347119822,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6568848758465011,
+ "eval_recall_macro": 0.801521450417969,
+ "eval_runtime": 0.2294,
+ "eval_samples_per_second": 710.491,
+ "eval_steps_per_second": 4.359,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4628
+ },
+ {
+ "epoch": 179.0,
+ "eval_accuracy": 0.9003965832824893,
+ "eval_auc": 0.9258087711499611,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6755010766937221,
+ "eval_f1_macro": 0.8083352405901716,
+ "eval_loss": 0.2701837122440338,
+ "eval_pr_auc": 0.6649310182754444,
+ "eval_precision": 0.6944822888283378,
+ "eval_precision_macro": 0.8155055479522995,
+ "eval_pred_class_0": 16732,
+ "eval_pred_class_1": 2936,
+ "eval_predicted_binding_ratio": 0.14927801504982713,
+ "eval_recall": 0.6575298290873912,
+ "eval_recall_macro": 0.8016930246420839,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.526,
+ "eval_steps_per_second": 5.095,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4654
+ },
+ {
+ "epoch": 180.0,
+ "eval_accuracy": 0.9004474272930649,
+ "eval_auc": 0.9258260560681089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6762566137566137,
+ "eval_f1_macro": 0.8087183092815753,
+ "eval_loss": 0.27005937695503235,
+ "eval_pr_auc": 0.6649666141685525,
+ "eval_precision": 0.6939260264675942,
+ "eval_precision_macro": 0.8153859544454471,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.802509552107089,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.51,
+ "eval_steps_per_second": 3.85,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4680
+ },
+ {
+ "epoch": 181.0,
+ "eval_accuracy": 0.9005999593247915,
+ "eval_auc": 0.9259436830505047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6764851894754261,
+ "eval_f1_macro": 0.8088820685009666,
+ "eval_loss": 0.2699625492095947,
+ "eval_pr_auc": 0.6656768815498022,
+ "eval_precision": 0.6947654656696125,
+ "eval_precision_macro": 0.8157852199805673,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.6591422121896162,
+ "eval_recall_macro": 0.8024690357139305,
+ "eval_runtime": 0.1825,
+ "eval_samples_per_second": 893.316,
+ "eval_steps_per_second": 5.48,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4706
+ },
+ {
+ "epoch": 182.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9259346220939755,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787778695293146,
+ "eval_f1_macro": 0.8101680579881181,
+ "eval_loss": 0.2698967456817627,
+ "eval_pr_auc": 0.6657214869012321,
+ "eval_precision": 0.6956668923493569,
+ "eval_precision_macro": 0.8165423129929146,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6626894550145115,
+ "eval_recall_macro": 0.8042124766471122,
+ "eval_runtime": 0.2487,
+ "eval_samples_per_second": 655.302,
+ "eval_steps_per_second": 4.02,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4732
+ },
+ {
+ "epoch": 183.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260415822575143,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782407407407407,
+ "eval_f1_macro": 0.8098906179070202,
+ "eval_loss": 0.26987963914871216,
+ "eval_pr_auc": 0.6659658035928079,
+ "eval_precision": 0.6959619952494062,
+ "eval_precision_macro": 0.8165833539431051,
+ "eval_pred_class_0": 16721,
+ "eval_pred_class_1": 2947,
+ "eval_predicted_binding_ratio": 0.14983729916615823,
+ "eval_recall": 0.6613995485327314,
+ "eval_recall_macro": 0.8036580648440201,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.055,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4758
+ },
+ {
+ "epoch": 184.0,
+ "eval_accuracy": 0.9010575554199716,
+ "eval_auc": 0.9260998701937684,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6781343036718491,
+ "eval_f1_macro": 0.8098391554406106,
+ "eval_loss": 0.26986023783683777,
+ "eval_pr_auc": 0.6661468987350531,
+ "eval_precision": 0.6960950764006791,
+ "eval_precision_macro": 0.8166237506024205,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6610770719122864,
+ "eval_recall_macro": 0.8035270070130636,
+ "eval_runtime": 0.237,
+ "eval_samples_per_second": 687.681,
+ "eval_steps_per_second": 4.219,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4784
+ },
+ {
+ "epoch": 185.0,
+ "eval_accuracy": 0.9012609314622737,
+ "eval_auc": 0.9262103340569317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6787958981144558,
+ "eval_f1_macro": 0.810230030763446,
+ "eval_loss": 0.26986950635910034,
+ "eval_pr_auc": 0.6665273243194801,
+ "eval_precision": 0.6967741935483871,
+ "eval_precision_macro": 0.8170231070594294,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8039098445920405,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.338,
+ "eval_steps_per_second": 4.229,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4810
+ },
+ {
+ "epoch": 186.0,
+ "eval_accuracy": 0.9014134634940004,
+ "eval_auc": 0.9262454683781669,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6797687861271676,
+ "eval_f1_macro": 0.8107536578092345,
+ "eval_loss": 0.2697572410106659,
+ "eval_pr_auc": 0.666716804071224,
+ "eval_precision": 0.6966824644549763,
+ "eval_precision_macro": 0.8171398441695726,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8047867330155776,
+ "eval_runtime": 0.1794,
+ "eval_samples_per_second": 908.74,
+ "eval_steps_per_second": 5.575,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4836
+ },
+ {
+ "epoch": 187.0,
+ "eval_accuracy": 0.9011083994305471,
+ "eval_auc": 0.9263210315000697,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6784592494627211,
+ "eval_f1_macro": 0.810014015033881,
+ "eval_loss": 0.26983824372291565,
+ "eval_pr_auc": 0.6669884807739552,
+ "eval_precision": 0.6960651289009498,
+ "eval_precision_macro": 0.8166629472255945,
+ "eval_pred_class_0": 16720,
+ "eval_pred_class_1": 2948,
+ "eval_predicted_binding_ratio": 0.14988814317673377,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038193031542425,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.044,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4862
+ },
+ {
+ "epoch": 188.0,
+ "eval_accuracy": 0.9012100874516982,
+ "eval_auc": 0.9264191350895575,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6786836447825368,
+ "eval_f1_macro": 0.8101580079180191,
+ "eval_loss": 0.26967287063598633,
+ "eval_pr_auc": 0.667515048707415,
+ "eval_precision": 0.6965376782077393,
+ "eval_precision_macro": 0.8169029737767557,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6617220251531764,
+ "eval_recall_macro": 0.8038796641127746,
+ "eval_runtime": 0.1851,
+ "eval_samples_per_second": 880.782,
+ "eval_steps_per_second": 5.404,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4888
+ },
+ {
+ "epoch": 189.0,
+ "eval_accuracy": 0.9013626194834249,
+ "eval_auc": 0.9265750497228504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6782752902155887,
+ "eval_f1_macro": 0.8100137635248964,
+ "eval_loss": 0.2697126567363739,
+ "eval_pr_auc": 0.6680073523436961,
+ "eval_precision": 0.698190508706043,
+ "eval_precision_macro": 0.8175521514197519,
+ "eval_pred_class_0": 16739,
+ "eval_pred_class_1": 2929,
+ "eval_predicted_binding_ratio": 0.14892210697579825,
+ "eval_recall": 0.6594646888100613,
+ "eval_recall_macro": 0.8030528007338771,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.766,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4914
+ },
+ {
+ "epoch": 190.0,
+ "eval_accuracy": 0.9017185275574537,
+ "eval_auc": 0.9265310393625665,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6808651147432723,
+ "eval_f1_macro": 0.8113902183590456,
+ "eval_loss": 0.26952171325683594,
+ "eval_pr_auc": 0.6676611850618266,
+ "eval_precision": 0.6975642760487145,
+ "eval_precision_macro": 0.8176966904417818,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8054920472149997,
+ "eval_runtime": 0.2625,
+ "eval_samples_per_second": 620.883,
+ "eval_steps_per_second": 3.809,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4940
+ },
+ {
+ "epoch": 191.0,
+ "eval_accuracy": 0.901921903599756,
+ "eval_auc": 0.9266825451738318,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6812097174020822,
+ "eval_f1_macro": 0.8116278420268636,
+ "eval_loss": 0.2694892883300781,
+ "eval_pr_auc": 0.668268169384226,
+ "eval_precision": 0.6986440677966101,
+ "eval_precision_macro": 0.8182178348314311,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8054817113011072,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.763,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4966
+ },
+ {
+ "epoch": 192.0,
+ "eval_accuracy": 0.9018710595891803,
+ "eval_auc": 0.9266840147811743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6814130075932651,
+ "eval_f1_macro": 0.8117083668893665,
+ "eval_loss": 0.26940062642097473,
+ "eval_pr_auc": 0.6682450098479531,
+ "eval_precision": 0.6980047345282381,
+ "eval_precision_macro": 0.817974900326174,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6655917445985166,
+ "eval_recall_macro": 0.8058447043147107,
+ "eval_runtime": 0.2532,
+ "eval_samples_per_second": 643.715,
+ "eval_steps_per_second": 3.949,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 4992
+ },
+ {
+ "epoch": 192.30769230769232,
+ "grad_norm": 15858.0107421875,
+ "learning_rate": 7.72994743624204e-07,
+ "loss": 0.2316,
+ "step": 5000
+ },
+ {
+ "epoch": 193.0,
+ "eval_accuracy": 0.9021761236526337,
+ "eval_auc": 0.9268377687996988,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6816677696889477,
+ "eval_f1_macro": 0.8119380540142443,
+ "eval_loss": 0.26932862401008606,
+ "eval_pr_auc": 0.6692251134414691,
+ "eval_precision": 0.6999660210669385,
+ "eval_precision_macro": 0.8188619343002854,
+ "eval_pred_class_0": 16725,
+ "eval_pred_class_1": 2943,
+ "eval_predicted_binding_ratio": 0.149633923123856,
+ "eval_recall": 0.6643018381167365,
+ "eval_recall_macro": 0.8055015558664808,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.594,
+ "eval_steps_per_second": 5.66,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5018
+ },
+ {
+ "epoch": 194.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.92693509378927,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6815697963238947,
+ "eval_f1_macro": 0.8119084228038069,
+ "eval_loss": 0.2693846523761749,
+ "eval_pr_auc": 0.6695232673057094,
+ "eval_precision": 0.7004765146358066,
+ "eval_precision_macro": 0.8190667092007484,
+ "eval_pred_class_0": 16730,
+ "eval_pred_class_1": 2938,
+ "eval_predicted_binding_ratio": 0.14937970307097823,
+ "eval_recall": 0.6636568848758465,
+ "eval_recall_macro": 0.8052696206838338,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.258,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5044
+ },
+ {
+ "epoch": 195.0,
+ "eval_accuracy": 0.9022778116737848,
+ "eval_auc": 0.926939395553809,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6825239511067063,
+ "eval_f1_macro": 0.8123875088746679,
+ "eval_loss": 0.269380122423172,
+ "eval_pr_auc": 0.6693235837806535,
+ "eval_precision": 0.6996274974602099,
+ "eval_precision_macro": 0.8188535333546936,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6662366978394066,
+ "eval_recall_macro": 0.8063482638107518,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.818,
+ "eval_steps_per_second": 3.944,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5070
+ },
+ {
+ "epoch": 196.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.9269277165550606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6819304892110031,
+ "eval_f1_macro": 0.8119407443800393,
+ "eval_loss": 0.2693455219268799,
+ "eval_pr_auc": 0.6690854783865479,
+ "eval_precision": 0.696969696969697,
+ "eval_precision_macro": 0.8176128877709905,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.8066008708211837,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.505,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5096
+ },
+ {
+ "epoch": 197.0,
+ "eval_accuracy": 0.9018202155786048,
+ "eval_auc": 0.926915803976337,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6826622843056697,
+ "eval_f1_macro": 0.8122944214527055,
+ "eval_loss": 0.2691311538219452,
+ "eval_pr_auc": 0.6692617138980786,
+ "eval_precision": 0.6960455764075067,
+ "eval_precision_macro": 0.8173347038115213,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8075182756378791,
+ "eval_runtime": 0.261,
+ "eval_samples_per_second": 624.485,
+ "eval_steps_per_second": 3.831,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5122
+ },
+ {
+ "epoch": 198.0,
+ "eval_accuracy": 0.9023794996949359,
+ "eval_auc": 0.9270834170733764,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822244289970208,
+ "eval_f1_macro": 0.8122781903500151,
+ "eval_loss": 0.26926350593566895,
+ "eval_pr_auc": 0.6700425139918407,
+ "eval_precision": 0.7007820469228153,
+ "eval_precision_macro": 0.8193035600788525,
+ "eval_pred_class_0": 16727,
+ "eval_pred_class_1": 2941,
+ "eval_predicted_binding_ratio": 0.1495322351027049,
+ "eval_recall": 0.6646243147371815,
+ "eval_recall_macro": 0.8057533356145012,
+ "eval_runtime": 0.1796,
+ "eval_samples_per_second": 907.821,
+ "eval_steps_per_second": 5.569,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5148
+ },
+ {
+ "epoch": 199.0,
+ "eval_accuracy": 0.9023286556843604,
+ "eval_auc": 0.9271362061477199,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6822167080231596,
+ "eval_f1_macro": 0.812256712426767,
+ "eval_loss": 0.26931333541870117,
+ "eval_pr_auc": 0.6701662107301889,
+ "eval_precision": 0.7004076086956522,
+ "eval_precision_macro": 0.8191406615590195,
+ "eval_pred_class_0": 16724,
+ "eval_pred_class_1": 2944,
+ "eval_predicted_binding_ratio": 0.14968476713443157,
+ "eval_recall": 0.6649467913576266,
+ "eval_recall_macro": 0.8058542129661919,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.682,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5174
+ },
+ {
+ "epoch": 200.0,
+ "eval_accuracy": 0.9019727476103315,
+ "eval_auc": 0.9271006630615285,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6823723228995058,
+ "eval_f1_macro": 0.8122076248057319,
+ "eval_loss": 0.26923447847366333,
+ "eval_pr_auc": 0.6700054234044599,
+ "eval_precision": 0.6975412596833951,
+ "eval_precision_macro": 0.8179304597716335,
+ "eval_pred_class_0": 16699,
+ "eval_pred_class_1": 2969,
+ "eval_predicted_binding_ratio": 0.15095586739882041,
+ "eval_recall": 0.6678490809416318,
+ "eval_recall_macro": 0.8068224700899382,
+ "eval_runtime": 0.2001,
+ "eval_samples_per_second": 814.446,
+ "eval_steps_per_second": 4.997,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5200
+ },
+ {
+ "epoch": 201.0,
+ "eval_accuracy": 0.9022269676632093,
+ "eval_auc": 0.9272154481542286,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6827256228345158,
+ "eval_f1_macro": 0.8124672441745833,
+ "eval_loss": 0.2691180408000946,
+ "eval_pr_auc": 0.6705128874375396,
+ "eval_precision": 0.6989864864864865,
+ "eval_precision_macro": 0.8186098340979236,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6672041277007417,
+ "eval_recall_macro": 0.8067112568243553,
+ "eval_runtime": 0.2326,
+ "eval_samples_per_second": 700.909,
+ "eval_steps_per_second": 4.3,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5226
+ },
+ {
+ "epoch": 202.0,
+ "eval_accuracy": 0.9027354077689648,
+ "eval_auc": 0.9272916730860608,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6847915636842973,
+ "eval_f1_macro": 0.8136435649304945,
+ "eval_loss": 0.2689346969127655,
+ "eval_pr_auc": 0.6709998700001464,
+ "eval_precision": 0.7001347708894878,
+ "eval_precision_macro": 0.8194386429297739,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.8081925820956238,
+ "eval_runtime": 0.1664,
+ "eval_samples_per_second": 979.381,
+ "eval_steps_per_second": 6.008,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5252
+ },
+ {
+ "epoch": 203.0,
+ "eval_accuracy": 0.9025828757372382,
+ "eval_auc": 0.9274415730349983,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6833057851239669,
+ "eval_f1_macro": 0.8128720237282395,
+ "eval_loss": 0.269077867269516,
+ "eval_pr_auc": 0.6716085952371595,
+ "eval_precision": 0.7009155645981688,
+ "eval_precision_macro": 0.819534880211639,
+ "eval_pred_class_0": 16719,
+ "eval_pred_class_1": 2949,
+ "eval_predicted_binding_ratio": 0.14993898718730933,
+ "eval_recall": 0.6665591744598517,
+ "eval_recall_macro": 0.8066604045173044,
+ "eval_runtime": 0.2556,
+ "eval_samples_per_second": 637.756,
+ "eval_steps_per_second": 3.913,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5278
+ },
+ {
+ "epoch": 204.0,
+ "eval_accuracy": 0.902837095790116,
+ "eval_auc": 0.9274026625041677,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6848095002474023,
+ "eval_f1_macro": 0.8136877723940104,
+ "eval_loss": 0.2691201865673065,
+ "eval_pr_auc": 0.6711363619469519,
+ "eval_precision": 0.700877785280216,
+ "eval_precision_macro": 0.8197612917781423,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6694614640438569,
+ "eval_recall_macro": 0.8079908273922429,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.544,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5304
+ },
+ {
+ "epoch": 205.0,
+ "eval_accuracy": 0.9029896278218426,
+ "eval_auc": 0.9274924350745481,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6850445691647409,
+ "eval_f1_macro": 0.8138546963859644,
+ "eval_loss": 0.2691231071949005,
+ "eval_pr_auc": 0.6713794419677425,
+ "eval_precision": 0.7017247210010146,
+ "eval_precision_macro": 0.8201640180913157,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8079503109990844,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.331,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5330
+ },
+ {
+ "epoch": 206.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9275284258556915,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860062633921213,
+ "eval_f1_macro": 0.8143728753012186,
+ "eval_loss": 0.26890990138053894,
+ "eval_pr_auc": 0.6717808840440014,
+ "eval_precision": 0.7016183412002697,
+ "eval_precision_macro": 0.8202739053624388,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8088271994226215,
+ "eval_runtime": 0.2637,
+ "eval_samples_per_second": 618.148,
+ "eval_steps_per_second": 3.792,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5356
+ },
+ {
+ "epoch": 207.0,
+ "eval_accuracy": 0.9030913158429937,
+ "eval_auc": 0.9275997261430513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6854785478547855,
+ "eval_f1_macro": 0.8141000144010073,
+ "eval_loss": 0.2689792513847351,
+ "eval_pr_auc": 0.6718662790463032,
+ "eval_precision": 0.7019263264616424,
+ "eval_precision_macro": 0.8203209943398044,
+ "eval_pred_class_0": 16709,
+ "eval_pred_class_1": 2959,
+ "eval_predicted_binding_ratio": 0.15044742729306487,
+ "eval_recall": 0.6697839406643018,
+ "eval_recall_macro": 0.8082727876195295,
+ "eval_runtime": 0.195,
+ "eval_samples_per_second": 835.813,
+ "eval_steps_per_second": 5.128,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5382
+ },
+ {
+ "epoch": 208.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9276906860783045,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.2688303291797638,
+ "eval_pr_auc": 0.6723694322774509,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.22,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5408
+ },
+ {
+ "epoch": 209.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9277029684919884,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6851760039662865,
+ "eval_f1_macro": 0.8139715080669648,
+ "eval_loss": 0.2689387798309326,
+ "eval_pr_auc": 0.6722283267888528,
+ "eval_precision": 0.7027118644067797,
+ "eval_precision_macro": 0.8206106277411336,
+ "eval_pred_class_0": 16718,
+ "eval_pred_class_1": 2950,
+ "eval_predicted_binding_ratio": 0.1499898311978849,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8077787367749694,
+ "eval_runtime": 0.2435,
+ "eval_samples_per_second": 669.46,
+ "eval_steps_per_second": 4.107,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5434
+ },
+ {
+ "epoch": 210.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9277600593308708,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6846370100876468,
+ "eval_f1_macro": 0.8136754097270521,
+ "eval_loss": 0.26892563700675964,
+ "eval_pr_auc": 0.6725269853749476,
+ "eval_precision": 0.7026476578411406,
+ "eval_precision_macro": 0.8204961767258567,
+ "eval_pred_class_0": 16722,
+ "eval_pred_class_1": 2946,
+ "eval_predicted_binding_ratio": 0.14978645515558267,
+ "eval_recall": 0.6675266043211867,
+ "eval_recall_macro": 0.807325202323568,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.763,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5460
+ },
+ {
+ "epoch": 211.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9278453744167288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.686128979053274,
+ "eval_f1_macro": 0.8144677293907912,
+ "eval_loss": 0.26878559589385986,
+ "eval_pr_auc": 0.6729846306066621,
+ "eval_precision": 0.7022282241728561,
+ "eval_precision_macro": 0.8205562286912407,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808756502550197,
+ "eval_runtime": 0.2433,
+ "eval_samples_per_second": 669.879,
+ "eval_steps_per_second": 4.11,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5486
+ },
+ {
+ "epoch": 211.53846153846155,
+ "grad_norm": 16655.041015625,
+ "learning_rate": 7.144675667015729e-07,
+ "loss": 0.2259,
+ "step": 5500
+ },
+ {
+ "epoch": 212.0,
+ "eval_accuracy": 0.9028879398006915,
+ "eval_auc": 0.9279623006591996,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6845061116617113,
+ "eval_f1_macro": 0.8135588667797169,
+ "eval_loss": 0.26880088448524475,
+ "eval_pr_auc": 0.6734939312101108,
+ "eval_precision": 0.7016593294954284,
+ "eval_precision_macro": 0.8200489288817256,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6681715575620768,
+ "eval_recall_macro": 0.8074967765476829,
+ "eval_runtime": 0.2166,
+ "eval_samples_per_second": 752.433,
+ "eval_steps_per_second": 4.616,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5512
+ },
+ {
+ "epoch": 213.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279055601902797,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6863359156090325,
+ "eval_f1_macro": 0.8145677594216373,
+ "eval_loss": 0.2687283456325531,
+ "eval_pr_auc": 0.6734074239428265,
+ "eval_precision": 0.7019554956169926,
+ "eval_precision_macro": 0.820472419105347,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.80901861821211,
+ "eval_runtime": 0.2336,
+ "eval_samples_per_second": 697.742,
+ "eval_steps_per_second": 4.281,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5538
+ },
+ {
+ "epoch": 214.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9279595755594916,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6864392815949909,
+ "eval_f1_macro": 0.8146177229810407,
+ "eval_loss": 0.2687055766582489,
+ "eval_pr_auc": 0.6734235549479375,
+ "eval_precision": 0.7018194070080862,
+ "eval_precision_macro": 0.8204306615878754,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8091496760430665,
+ "eval_runtime": 0.225,
+ "eval_samples_per_second": 724.337,
+ "eval_steps_per_second": 4.444,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5564
+ },
+ {
+ "epoch": 215.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9280330753916157,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685383980181668,
+ "eval_f1_macro": 0.8140720567955604,
+ "eval_loss": 0.2687467932701111,
+ "eval_pr_auc": 0.6737104569152422,
+ "eval_precision": 0.7024373730534867,
+ "eval_precision_macro": 0.8205258541706347,
+ "eval_pred_class_0": 16714,
+ "eval_pred_class_1": 2954,
+ "eval_predicted_binding_ratio": 0.15019320724018712,
+ "eval_recall": 0.6691389874234118,
+ "eval_recall_macro": 0.8080408524368825,
+ "eval_runtime": 0.1833,
+ "eval_samples_per_second": 889.114,
+ "eval_steps_per_second": 5.455,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5590
+ },
+ {
+ "epoch": 216.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9281060496687963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6860158311345647,
+ "eval_f1_macro": 0.8143952682963037,
+ "eval_loss": 0.26888203620910645,
+ "eval_pr_auc": 0.6738155361634312,
+ "eval_precision": 0.7019912251096861,
+ "eval_precision_macro": 0.8204358998939631,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808726322070931,
+ "eval_runtime": 0.1764,
+ "eval_samples_per_second": 923.938,
+ "eval_steps_per_second": 5.668,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5616
+ },
+ {
+ "epoch": 217.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9281046287239484,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6865425794761983,
+ "eval_f1_macro": 0.8146676522813128,
+ "eval_loss": 0.2686736285686493,
+ "eval_pr_auc": 0.6740322097472393,
+ "eval_precision": 0.7016835016835017,
+ "eval_precision_macro": 0.8203890020095554,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.672041277007417,
+ "eval_recall_macro": 0.809280733874023,
+ "eval_runtime": 0.2537,
+ "eval_samples_per_second": 642.37,
+ "eval_steps_per_second": 3.941,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5642
+ },
+ {
+ "epoch": 218.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9282425576991689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862615587846763,
+ "eval_f1_macro": 0.814585106315415,
+ "eval_loss": 0.26872488856315613,
+ "eval_pr_auc": 0.6745604450622946,
+ "eval_precision": 0.7032148900169205,
+ "eval_precision_macro": 0.8210025266814094,
+ "eval_pred_class_0": 16713,
+ "eval_pred_class_1": 2955,
+ "eval_predicted_binding_ratio": 0.15024405125076265,
+ "eval_recall": 0.6701064172847468,
+ "eval_recall_macro": 0.808584928326082,
+ "eval_runtime": 0.2515,
+ "eval_samples_per_second": 648.077,
+ "eval_steps_per_second": 3.976,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5668
+ },
+ {
+ "epoch": 219.0,
+ "eval_accuracy": 0.9032946918852959,
+ "eval_auc": 0.9283728174652107,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6862421643022105,
+ "eval_f1_macro": 0.8145402081954642,
+ "eval_loss": 0.26861947774887085,
+ "eval_pr_auc": 0.675175157595335,
+ "eval_precision": 0.7024653833164471,
+ "eval_precision_macro": 0.8206766373097469,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.808786683029463,
+ "eval_runtime": 0.269,
+ "eval_samples_per_second": 605.917,
+ "eval_steps_per_second": 3.717,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5694
+ },
+ {
+ "epoch": 220.0,
+ "eval_accuracy": 0.9035997559487492,
+ "eval_auc": 0.9283705984554486,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6875411997363217,
+ "eval_f1_macro": 0.8152747479984963,
+ "eval_loss": 0.2684967517852783,
+ "eval_pr_auc": 0.6752603675091132,
+ "eval_precision": 0.703067071115605,
+ "eval_precision_macro": 0.8211461336058236,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.809754112890798,
+ "eval_runtime": 0.2663,
+ "eval_samples_per_second": 612.018,
+ "eval_steps_per_second": 3.755,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5720
+ },
+ {
+ "epoch": 221.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9285080992007146,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871489924017179,
+ "eval_f1_macro": 0.8151206773197821,
+ "eval_loss": 0.2685548961162567,
+ "eval_pr_auc": 0.6758164431668767,
+ "eval_precision": 0.7043684388757196,
+ "eval_precision_macro": 0.8216427895844347,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.6707513705256369,
+ "eval_recall_macro": 0.809028126863591,
+ "eval_runtime": 0.1986,
+ "eval_samples_per_second": 820.602,
+ "eval_steps_per_second": 5.034,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5746
+ },
+ {
+ "epoch": 222.0,
+ "eval_accuracy": 0.9034980679275981,
+ "eval_auc": 0.9285004105265384,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870052770448549,
+ "eval_f1_macro": 0.8149801571567146,
+ "eval_loss": 0.2685752809047699,
+ "eval_pr_auc": 0.6755687553750968,
+ "eval_precision": 0.7030037124535943,
+ "eval_precision_macro": 0.8210319370409247,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8093005784393965,
+ "eval_runtime": 0.2128,
+ "eval_samples_per_second": 765.976,
+ "eval_steps_per_second": 4.699,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5772
+ },
+ {
+ "epoch": 223.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9285113303903685,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6871913072110636,
+ "eval_f1_macro": 0.815034532807023,
+ "eval_loss": 0.2685534358024597,
+ "eval_pr_auc": 0.675465436437485,
+ "eval_precision": 0.7019845274133871,
+ "eval_precision_macro": 0.8206238899420935,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8097644488046905,
+ "eval_runtime": 0.2565,
+ "eval_samples_per_second": 635.434,
+ "eval_steps_per_second": 3.898,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5798
+ },
+ {
+ "epoch": 224.0,
+ "eval_accuracy": 0.9033963799064471,
+ "eval_auc": 0.9286790602773953,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.685742639761826,
+ "eval_f1_macro": 0.814334221653217,
+ "eval_loss": 0.2686038315296173,
+ "eval_pr_auc": 0.6763975611431872,
+ "eval_precision": 0.7039049235993209,
+ "eval_precision_macro": 0.8212163498580232,
+ "eval_pred_class_0": 16723,
+ "eval_pred_class_1": 2945,
+ "eval_predicted_binding_ratio": 0.1497356111450071,
+ "eval_recall": 0.6684940341825217,
+ "eval_recall_macro": 0.8079296391712996,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.051,
+ "eval_steps_per_second": 3.865,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5824
+ },
+ {
+ "epoch": 225.0,
+ "eval_accuracy": 0.9030404718324181,
+ "eval_auc": 0.9286844910118134,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6857801944307135,
+ "eval_f1_macro": 0.8142280597608222,
+ "eval_loss": 0.268373966217041,
+ "eval_pr_auc": 0.6766849702960268,
+ "eval_precision": 0.7011455525606469,
+ "eval_precision_macro": 0.8200338541246348,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6710738471460819,
+ "eval_recall_macro": 0.8087668384640894,
+ "eval_runtime": 0.2618,
+ "eval_samples_per_second": 622.527,
+ "eval_steps_per_second": 3.819,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5850
+ },
+ {
+ "epoch": 226.0,
+ "eval_accuracy": 0.9031421598535693,
+ "eval_auc": 0.9286265437130228,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6868321551865856,
+ "eval_f1_macro": 0.8147720454758898,
+ "eval_loss": 0.268480122089386,
+ "eval_pr_auc": 0.6762864798501788,
+ "eval_precision": 0.7005365526492288,
+ "eval_precision_macro": 0.8199434531195322,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8098756620702734,
+ "eval_runtime": 0.1801,
+ "eval_samples_per_second": 905.249,
+ "eval_steps_per_second": 5.554,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5876
+ },
+ {
+ "epoch": 227.0,
+ "eval_accuracy": 0.9034472239170226,
+ "eval_auc": 0.9286911091111043,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6878185106033208,
+ "eval_f1_macro": 0.8153554406082493,
+ "eval_loss": 0.2684793770313263,
+ "eval_pr_auc": 0.6763719243742072,
+ "eval_precision": 0.7015425888665325,
+ "eval_precision_macro": 0.8205363669491479,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.810449918438739,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.327,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5902
+ },
+ {
+ "epoch": 228.0,
+ "eval_accuracy": 0.9032438478747203,
+ "eval_auc": 0.9287447935753516,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6870580496628844,
+ "eval_f1_macro": 0.8149167860703537,
+ "eval_loss": 0.2684626877307892,
+ "eval_pr_auc": 0.6765555444970285,
+ "eval_precision": 0.701006711409396,
+ "eval_precision_macro": 0.8201821668264622,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8099360230288054,
+ "eval_runtime": 0.253,
+ "eval_samples_per_second": 644.184,
+ "eval_steps_per_second": 3.952,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5928
+ },
+ {
+ "epoch": 229.0,
+ "eval_accuracy": 0.9037522879804759,
+ "eval_auc": 0.92885044958403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6877783275606135,
+ "eval_f1_macro": 0.815442675636767,
+ "eval_loss": 0.2684047222137451,
+ "eval_pr_auc": 0.6771563538797724,
+ "eval_precision": 0.7039162727886563,
+ "eval_precision_macro": 0.8215498998326138,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.6723637536278619,
+ "eval_recall_macro": 0.8097135964976395,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.352,
+ "eval_steps_per_second": 3.886,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5954
+ },
+ {
+ "epoch": 230.0,
+ "eval_accuracy": 0.9031930038641448,
+ "eval_auc": 0.9288052907888691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6866359447004609,
+ "eval_f1_macro": 0.814695001815053,
+ "eval_loss": 0.2683703601360321,
+ "eval_pr_auc": 0.6770692308922137,
+ "eval_precision": 0.7011764705882353,
+ "eval_precision_macro": 0.8201862703986524,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80951266905667,
+ "eval_runtime": 0.1803,
+ "eval_samples_per_second": 903.896,
+ "eval_steps_per_second": 5.545,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 5980
+ },
+ {
+ "epoch": 230.76923076923077,
+ "grad_norm": 18226.349609375,
+ "learning_rate": 6.520804793983146e-07,
+ "loss": 0.2213,
+ "step": 6000
+ },
+ {
+ "epoch": 231.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289028104284194,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881685041961494,
+ "eval_f1_macro": 0.8155957226774667,
+ "eval_loss": 0.26838722825050354,
+ "eval_pr_auc": 0.677382862396771,
+ "eval_precision": 0.7026209677419355,
+ "eval_precision_macro": 0.821056469972094,
+ "eval_pred_class_0": 16692,
+ "eval_pred_class_1": 2976,
+ "eval_predicted_binding_ratio": 0.1513117754728493,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8104395825248465,
+ "eval_runtime": 0.2471,
+ "eval_samples_per_second": 659.676,
+ "eval_steps_per_second": 4.047,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6006
+ },
+ {
+ "epoch": 232.0,
+ "eval_accuracy": 0.9036505999593248,
+ "eval_auc": 0.9289413705892875,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882710972199375,
+ "eval_f1_macro": 0.8156453059542872,
+ "eval_loss": 0.2681979238986969,
+ "eval_pr_auc": 0.6777962434595076,
+ "eval_precision": 0.7024848891873741,
+ "eval_precision_macro": 0.8210147633474318,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8105706403558031,
+ "eval_runtime": 0.2338,
+ "eval_samples_per_second": 697.187,
+ "eval_steps_per_second": 4.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6032
+ },
+ {
+ "epoch": 233.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9290857424788171,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6879233437964646,
+ "eval_f1_macro": 0.8155838213438953,
+ "eval_loss": 0.2683457136154175,
+ "eval_pr_auc": 0.6782535995592411,
+ "eval_precision": 0.7052845528455285,
+ "eval_precision_macro": 0.8221624965711252,
+ "eval_pred_class_0": 16716,
+ "eval_pred_class_1": 2952,
+ "eval_predicted_binding_ratio": 0.150091519219036,
+ "eval_recall": 0.6713963237665269,
+ "eval_recall_macro": 0.8094411449218342,
+ "eval_runtime": 0.2259,
+ "eval_samples_per_second": 721.682,
+ "eval_steps_per_second": 4.427,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6058
+ },
+ {
+ "epoch": 234.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9291390181781085,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881400726792204,
+ "eval_f1_macro": 0.8157063562723066,
+ "eval_loss": 0.26850852370262146,
+ "eval_pr_auc": 0.678201898193761,
+ "eval_precision": 0.7053843548933288,
+ "eval_precision_macro": 0.8222404873479507,
+ "eval_pred_class_0": 16715,
+ "eval_pred_class_1": 2953,
+ "eval_predicted_binding_ratio": 0.15014236322961155,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8096023832320567,
+ "eval_runtime": 0.2554,
+ "eval_samples_per_second": 638.187,
+ "eval_steps_per_second": 3.915,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6084
+ },
+ {
+ "epoch": 235.0,
+ "eval_accuracy": 0.9037014439699004,
+ "eval_auc": 0.9291422688327602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.687767886580943,
+ "eval_f1_macro": 0.8154198615351363,
+ "eval_loss": 0.26855188608169556,
+ "eval_pr_auc": 0.6780321638936206,
+ "eval_precision": 0.7035413153456999,
+ "eval_precision_macro": 0.8213868942770528,
+ "eval_pred_class_0": 16703,
+ "eval_pred_class_1": 2965,
+ "eval_predicted_binding_ratio": 0.1507524913565182,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.80981447384933,
+ "eval_runtime": 0.1711,
+ "eval_samples_per_second": 952.835,
+ "eval_steps_per_second": 5.846,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6110
+ },
+ {
+ "epoch": 236.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9292183185796111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6881081972620815,
+ "eval_f1_macro": 0.8156376648859622,
+ "eval_loss": 0.2684246003627777,
+ "eval_pr_auc": 0.6783782374353945,
+ "eval_precision": 0.7042538825118163,
+ "eval_precision_macro": 0.8217486340608884,
+ "eval_pred_class_0": 16706,
+ "eval_pred_class_1": 2962,
+ "eval_predicted_binding_ratio": 0.15059995932479153,
+ "eval_recall": 0.672686230248307,
+ "eval_recall_macro": 0.8099050152871281,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.985,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6136
+ },
+ {
+ "epoch": 237.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.9291723227895398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.2682003080844879,
+ "eval_pr_auc": 0.678474943261759,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.44,
+ "eval_steps_per_second": 3.947,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6162
+ },
+ {
+ "epoch": 238.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9292599542101496,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891891891891891,
+ "eval_f1_macro": 0.8162490373023017,
+ "eval_loss": 0.2683660686016083,
+ "eval_pr_auc": 0.6787382085049865,
+ "eval_precision": 0.704752275025278,
+ "eval_precision_macro": 0.8221384271958916,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8107112068382407,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.396,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6188
+ },
+ {
+ "epoch": 239.0,
+ "eval_accuracy": 0.9039556640227782,
+ "eval_auc": 0.9292687329242089,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886434811274106,
+ "eval_f1_macro": 0.8159319482645679,
+ "eval_loss": 0.2683408558368683,
+ "eval_pr_auc": 0.6788700814485856,
+ "eval_precision": 0.7043155765340526,
+ "eval_precision_macro": 0.8218620153057044,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.673653660109642,
+ "eval_recall_macro": 0.8103585497385295,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.65,
+ "eval_steps_per_second": 3.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6214
+ },
+ {
+ "epoch": 240.0,
+ "eval_accuracy": 0.9038031319910514,
+ "eval_auc": 0.9293738244479479,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6886109282422647,
+ "eval_f1_macro": 0.8158628904590758,
+ "eval_loss": 0.26828694343566895,
+ "eval_pr_auc": 0.6792947923537326,
+ "eval_precision": 0.7031932773109244,
+ "eval_precision_macro": 0.8213743898086402,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6746210899709771,
+ "eval_recall_macro": 0.8106611817936011,
+ "eval_runtime": 0.1692,
+ "eval_samples_per_second": 963.308,
+ "eval_steps_per_second": 5.91,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6240
+ },
+ {
+ "epoch": 241.0,
+ "eval_accuracy": 0.9039048200122025,
+ "eval_auc": 0.929388987681323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891447368421053,
+ "eval_f1_macro": 0.8161564434751782,
+ "eval_loss": 0.26832982897758484,
+ "eval_pr_auc": 0.679377997729221,
+ "eval_precision": 0.7032561262168513,
+ "eval_precision_macro": 0.8214884501897367,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6755885198323122,
+ "eval_recall_macro": 0.8111147162450025,
+ "eval_runtime": 0.2624,
+ "eval_samples_per_second": 621.159,
+ "eval_steps_per_second": 3.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6266
+ },
+ {
+ "epoch": 242.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.9294373582011398,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6891669410602568,
+ "eval_f1_macro": 0.8162027357577154,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6798375856033828,
+ "eval_precision": 0.7040026908846283,
+ "eval_precision_macro": 0.8218126661970311,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8109129615416215,
+ "eval_runtime": 0.2508,
+ "eval_samples_per_second": 649.948,
+ "eval_steps_per_second": 3.987,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6292
+ },
+ {
+ "epoch": 243.0,
+ "eval_accuracy": 0.9040065080333537,
+ "eval_auc": 0.929443917905437,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6897798225435425,
+ "eval_f1_macro": 0.8164989338281623,
+ "eval_loss": 0.26814863085746765,
+ "eval_pr_auc": 0.6797263827568155,
+ "eval_precision": 0.7031825795644892,
+ "eval_precision_macro": 0.8215607197408852,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8116993085273606,
+ "eval_runtime": 0.2535,
+ "eval_samples_per_second": 642.959,
+ "eval_steps_per_second": 3.945,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6318
+ },
+ {
+ "epoch": 244.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9295684160320964,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.26816216111183167,
+ "eval_pr_auc": 0.6803755613590039,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2375,
+ "eval_samples_per_second": 686.328,
+ "eval_steps_per_second": 4.211,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6344
+ },
+ {
+ "epoch": 245.0,
+ "eval_accuracy": 0.9041081960545048,
+ "eval_auc": 0.9295755402213329,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6888815572418344,
+ "eval_f1_macro": 0.8161003326270482,
+ "eval_loss": 0.2682454288005829,
+ "eval_pr_auc": 0.6803390373338819,
+ "eval_precision": 0.7051671732522796,
+ "eval_precision_macro": 0.8222669528798059,
+ "eval_pred_class_0": 16707,
+ "eval_pred_class_1": 2961,
+ "eval_predicted_binding_ratio": 0.150549115314216,
+ "eval_recall": 0.6733311834891971,
+ "eval_recall_macro": 0.8103180333453712,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 647.048,
+ "eval_steps_per_second": 3.97,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6370
+ },
+ {
+ "epoch": 246.0,
+ "eval_accuracy": 0.903853976001627,
+ "eval_auc": 0.9296204070415253,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6882110469909316,
+ "eval_f1_macro": 0.8156873815700654,
+ "eval_loss": 0.2682053744792938,
+ "eval_pr_auc": 0.6804463740899893,
+ "eval_precision": 0.7041160593792173,
+ "eval_precision_macro": 0.8217060181953557,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.673008706868752,
+ "eval_recall_macro": 0.8100360731180846,
+ "eval_runtime": 0.2411,
+ "eval_samples_per_second": 676.2,
+ "eval_steps_per_second": 4.148,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6396
+ },
+ {
+ "epoch": 247.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9296705294111545,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909270541742137,
+ "eval_f1_macro": 0.817248994423186,
+ "eval_loss": 0.2681281566619873,
+ "eval_pr_auc": 0.6807214505356617,
+ "eval_precision": 0.7059219380888291,
+ "eval_precision_macro": 0.8229238344013863,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119002359683303,
+ "eval_runtime": 0.1963,
+ "eval_samples_per_second": 830.371,
+ "eval_steps_per_second": 5.094,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6422
+ },
+ {
+ "epoch": 248.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9296871135893773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912442396313364,
+ "eval_f1_macro": 0.8174200753177728,
+ "eval_loss": 0.26824310421943665,
+ "eval_pr_auc": 0.6805326752113899,
+ "eval_precision": 0.7058823529411765,
+ "eval_precision_macro": 0.8229585490219571,
+ "eval_pred_class_0": 16693,
+ "eval_pred_class_1": 2975,
+ "eval_predicted_binding_ratio": 0.15126093146227373,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8121925321095093,
+ "eval_runtime": 0.2645,
+ "eval_samples_per_second": 616.162,
+ "eval_steps_per_second": 3.78,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6448
+ },
+ {
+ "epoch": 249.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.929768866580617,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6914841047603361,
+ "eval_f1_macro": 0.8175893393183914,
+ "eval_loss": 0.2681940495967865,
+ "eval_pr_auc": 0.6810052289277716,
+ "eval_precision": 0.7067340067340068,
+ "eval_precision_macro": 0.8233634101223034,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6768784263140922,
+ "eval_recall_macro": 0.8121520157163508,
+ "eval_runtime": 0.2396,
+ "eval_samples_per_second": 680.397,
+ "eval_steps_per_second": 4.174,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6474
+ },
+ {
+ "epoch": 250.0,
+ "grad_norm": 35924.55078125,
+ "learning_rate": 5.869563021464528e-07,
+ "loss": 0.2171,
+ "step": 6500
+ },
+ {
+ "epoch": 250.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9298367215633461,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912928759894459,
+ "eval_f1_macro": 0.817514675551828,
+ "eval_loss": 0.2681121826171875,
+ "eval_pr_auc": 0.681415067318076,
+ "eval_precision": 0.7073911576105298,
+ "eval_precision_macro": 0.8236147646777582,
+ "eval_pred_class_0": 16705,
+ "eval_pred_class_1": 2963,
+ "eval_predicted_binding_ratio": 0.1506508033353671,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8117890227027473,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.453,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6500
+ },
+ {
+ "epoch": 251.0,
+ "eval_accuracy": 0.9046674801708359,
+ "eval_auc": 0.9298684105799504,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6909510466457887,
+ "eval_f1_macro": 0.8172961371074986,
+ "eval_loss": 0.2681705951690674,
+ "eval_pr_auc": 0.6814913977659953,
+ "eval_precision": 0.7066756574511126,
+ "eval_precision_macro": 0.8232516115060616,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116984812649493,
+ "eval_runtime": 0.271,
+ "eval_samples_per_second": 601.566,
+ "eval_steps_per_second": 3.691,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6526
+ },
+ {
+ "epoch": 252.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300244614682288,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6893926857521099,
+ "eval_f1_macro": 0.8165072340543806,
+ "eval_loss": 0.2683667540550232,
+ "eval_pr_auc": 0.6820860933741758,
+ "eval_precision": 0.7080217539089055,
+ "eval_precision_macro": 0.8235792136757251,
+ "eval_pred_class_0": 16726,
+ "eval_pred_class_1": 2942,
+ "eval_predicted_binding_ratio": 0.14958307911328045,
+ "eval_recall": 0.671718800386972,
+ "eval_recall_macro": 0.8099343685039828,
+ "eval_runtime": 0.2316,
+ "eval_samples_per_second": 703.782,
+ "eval_steps_per_second": 4.318,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6552
+ },
+ {
+ "epoch": 253.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9299541441632636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6910408432147562,
+ "eval_f1_macro": 0.8173217684087248,
+ "eval_loss": 0.26824095845222473,
+ "eval_pr_auc": 0.6816317339832768,
+ "eval_precision": 0.7061595422416694,
+ "eval_precision_macro": 0.8230444354317887,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6765559496936472,
+ "eval_recall_macro": 0.8119304164475962,
+ "eval_runtime": 0.2652,
+ "eval_samples_per_second": 614.632,
+ "eval_steps_per_second": 3.771,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6578
+ },
+ {
+ "epoch": 254.0,
+ "eval_accuracy": 0.9044641041285336,
+ "eval_auc": 0.9299257349988078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913093477903729,
+ "eval_f1_macro": 0.8173981849782266,
+ "eval_loss": 0.2680823504924774,
+ "eval_pr_auc": 0.6815799614400636,
+ "eval_precision": 0.7046215673141326,
+ "eval_precision_macro": 0.8224282755645115,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6784908094163173,
+ "eval_recall_macro": 0.8126262219955371,
+ "eval_runtime": 0.1721,
+ "eval_samples_per_second": 947.131,
+ "eval_steps_per_second": 5.811,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6604
+ },
+ {
+ "epoch": 255.0,
+ "eval_accuracy": 0.9045657921496848,
+ "eval_auc": 0.9300487927156216,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6902129064202014,
+ "eval_f1_macro": 0.8169037907104764,
+ "eval_loss": 0.2682023346424103,
+ "eval_pr_auc": 0.6819970017971004,
+ "eval_precision": 0.7068965517241379,
+ "eval_precision_macro": 0.8232268515652408,
+ "eval_pred_class_0": 16710,
+ "eval_pred_class_1": 2958,
+ "eval_predicted_binding_ratio": 0.1503965832824893,
+ "eval_recall": 0.6742986133505321,
+ "eval_recall_macro": 0.8109828311516347,
+ "eval_runtime": 0.2548,
+ "eval_samples_per_second": 639.732,
+ "eval_steps_per_second": 3.925,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6630
+ },
+ {
+ "epoch": 256.0,
+ "eval_accuracy": 0.9046166361602603,
+ "eval_auc": 0.9300701263533355,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6908371786420567,
+ "eval_f1_macro": 0.8172233266061071,
+ "eval_loss": 0.26801708340644836,
+ "eval_pr_auc": 0.6822973012887885,
+ "eval_precision": 0.7064374789349511,
+ "eval_precision_macro": 0.8231307207859595,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8116683007856833,
+ "eval_runtime": 0.2302,
+ "eval_samples_per_second": 708.13,
+ "eval_steps_per_second": 4.344,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6656
+ },
+ {
+ "epoch": 257.0,
+ "eval_accuracy": 0.9048708562131381,
+ "eval_auc": 0.9301923470752391,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6911012052171042,
+ "eval_f1_macro": 0.8174397819709127,
+ "eval_loss": 0.26805296540260315,
+ "eval_pr_auc": 0.6828138126269635,
+ "eval_precision": 0.7080514208389715,
+ "eval_precision_macro": 0.8238677400987582,
+ "eval_pred_class_0": 16712,
+ "eval_pred_class_1": 2956,
+ "eval_predicted_binding_ratio": 0.1502948952613382,
+ "eval_recall": 0.6749435665914221,
+ "eval_recall_macro": 0.8114260296891438,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.189,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6682
+ },
+ {
+ "epoch": 258.0,
+ "eval_accuracy": 0.9047691681919869,
+ "eval_auc": 0.9301935928351055,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6912806988626998,
+ "eval_f1_macro": 0.8174910212279173,
+ "eval_loss": 0.26811105012893677,
+ "eval_pr_auc": 0.6827059930276215,
+ "eval_precision": 0.7070128118678355,
+ "eval_precision_macro": 0.8234501252489699,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6762334730732021,
+ "eval_recall_macro": 0.8118899000544377,
+ "eval_runtime": 0.2455,
+ "eval_samples_per_second": 663.913,
+ "eval_steps_per_second": 4.073,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6708
+ },
+ {
+ "epoch": 259.0,
+ "eval_accuracy": 0.9049217002237137,
+ "eval_auc": 0.9302954726341887,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6913172664245626,
+ "eval_f1_macro": 0.817561992789179,
+ "eval_loss": 0.2680983245372772,
+ "eval_pr_auc": 0.6832649652047296,
+ "eval_precision": 0.7081501521812648,
+ "eval_precision_macro": 0.8239452215038332,
+ "eval_pred_class_0": 16711,
+ "eval_pred_class_1": 2957,
+ "eval_predicted_binding_ratio": 0.15034573927191378,
+ "eval_recall": 0.6752660432118671,
+ "eval_recall_macro": 0.8115872679993663,
+ "eval_runtime": 0.2456,
+ "eval_samples_per_second": 663.63,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6734
+ },
+ {
+ "epoch": 260.0,
+ "eval_accuracy": 0.9048200122025626,
+ "eval_auc": 0.9302687082620565,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920039486673247,
+ "eval_f1_macro": 0.8178583697873878,
+ "eval_loss": 0.2680213451385498,
+ "eval_pr_auc": 0.6832868902825516,
+ "eval_precision": 0.7064158548874706,
+ "eval_precision_macro": 0.8233115761166728,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8127064275194428,
+ "eval_runtime": 0.2584,
+ "eval_samples_per_second": 630.797,
+ "eval_steps_per_second": 3.87,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6760
+ },
+ {
+ "epoch": 261.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303376240871719,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.691635043722158,
+ "eval_f1_macro": 0.817733374603378,
+ "eval_loss": 0.2681059241294861,
+ "eval_pr_auc": 0.6834436649713198,
+ "eval_precision": 0.7081081081081081,
+ "eval_precision_macro": 0.8239786410782342,
+ "eval_pred_class_0": 16708,
+ "eval_pred_class_1": 2960,
+ "eval_predicted_binding_ratio": 0.15049827130364044,
+ "eval_recall": 0.6759109964527572,
+ "eval_recall_macro": 0.8118795641405453,
+ "eval_runtime": 0.1861,
+ "eval_samples_per_second": 875.956,
+ "eval_steps_per_second": 5.374,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6786
+ },
+ {
+ "epoch": 262.0,
+ "eval_accuracy": 0.9049725442342892,
+ "eval_auc": 0.9303520963131211,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6920415224913494,
+ "eval_f1_macro": 0.8179298603528982,
+ "eval_loss": 0.2680259048938751,
+ "eval_pr_auc": 0.683712511498021,
+ "eval_precision": 0.7075471698113207,
+ "eval_precision_macro": 0.8238035250254208,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6772009029345373,
+ "eval_recall_macro": 0.8124037954643712,
+ "eval_runtime": 0.2544,
+ "eval_samples_per_second": 640.771,
+ "eval_steps_per_second": 3.931,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6812
+ },
+ {
+ "epoch": 263.0,
+ "eval_accuracy": 0.9051759202765914,
+ "eval_auc": 0.9303956005834594,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.692700609655627,
+ "eval_f1_macro": 0.8183195235731167,
+ "eval_loss": 0.2679544985294342,
+ "eval_pr_auc": 0.6840791766505604,
+ "eval_precision": 0.7082210242587601,
+ "eval_precision_macro": 0.8242003324886615,
+ "eval_pred_class_0": 16700,
+ "eval_pred_class_1": 2968,
+ "eval_predicted_binding_ratio": 0.15090502338824485,
+ "eval_recall": 0.6778458561754273,
+ "eval_recall_macro": 0.8127866330433483,
+ "eval_runtime": 0.224,
+ "eval_samples_per_second": 727.59,
+ "eval_steps_per_second": 4.464,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6838
+ },
+ {
+ "epoch": 264.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304257713302264,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6933465085638999,
+ "eval_f1_macro": 0.8186850387937344,
+ "eval_loss": 0.26790833473205566,
+ "eval_pr_auc": 0.6843453300290927,
+ "eval_precision": 0.70851565129586,
+ "eval_precision_macro": 0.8244321084532245,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.8132703479740159,
+ "eval_runtime": 0.2587,
+ "eval_samples_per_second": 630.122,
+ "eval_steps_per_second": 3.866,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6864
+ },
+ {
+ "epoch": 265.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9304709495903853,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6934474810668423,
+ "eval_f1_macro": 0.8187338421508825,
+ "eval_loss": 0.26806485652923584,
+ "eval_pr_auc": 0.684236710788699,
+ "eval_precision": 0.7083753784056509,
+ "eval_precision_macro": 0.8243883480827296,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6791357626572073,
+ "eval_recall_macro": 0.8134014058049723,
+ "eval_runtime": 0.2519,
+ "eval_samples_per_second": 646.986,
+ "eval_steps_per_second": 3.969,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6890
+ },
+ {
+ "epoch": 266.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9305072226139984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6937767533750412,
+ "eval_f1_macro": 0.8189285426426647,
+ "eval_loss": 0.2682030200958252,
+ "eval_pr_auc": 0.6841419811140891,
+ "eval_precision": 0.708711738984191,
+ "eval_precision_macro": 0.8245864774585525,
+ "eval_pred_class_0": 16695,
+ "eval_pred_class_1": 2973,
+ "eval_predicted_binding_ratio": 0.15115924344112264,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8135928245944608,
+ "eval_runtime": 0.1928,
+ "eval_samples_per_second": 845.449,
+ "eval_steps_per_second": 5.187,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6916
+ },
+ {
+ "epoch": 267.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9304825799266392,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6942937016938003,
+ "eval_f1_macro": 0.8191961667392471,
+ "eval_loss": 0.2681148052215576,
+ "eval_pr_auc": 0.6840537331785722,
+ "eval_precision": 0.7083892617449664,
+ "eval_precision_macro": 0.824532598274209,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6807481457594324,
+ "eval_recall_macro": 0.8141472363975528,
+ "eval_runtime": 0.2454,
+ "eval_samples_per_second": 664.157,
+ "eval_steps_per_second": 4.075,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6942
+ },
+ {
+ "epoch": 268.0,
+ "eval_accuracy": 0.9057860484034981,
+ "eval_auc": 0.930527787384295,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6951801283105774,
+ "eval_f1_macro": 0.8197312675109731,
+ "eval_loss": 0.2679014503955841,
+ "eval_pr_auc": 0.6848281199608002,
+ "eval_precision": 0.7095366017461383,
+ "eval_precision_macro": 0.8251697388598875,
+ "eval_pred_class_0": 16690,
+ "eval_pred_class_1": 2978,
+ "eval_predicted_binding_ratio": 0.1514134634940004,
+ "eval_recall": 0.6813930990003225,
+ "eval_recall_macro": 0.8145904349350619,
+ "eval_runtime": 0.2174,
+ "eval_samples_per_second": 749.726,
+ "eval_steps_per_second": 4.6,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6968
+ },
+ {
+ "epoch": 269.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305513205667733,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6949375410913873,
+ "eval_f1_macro": 0.8195606747920547,
+ "eval_loss": 0.26782992482185364,
+ "eval_pr_auc": 0.6850252128050689,
+ "eval_precision": 0.7086825343613812,
+ "eval_precision_macro": 0.8247638023919581,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8146309513282204,
+ "eval_runtime": 0.2304,
+ "eval_samples_per_second": 707.619,
+ "eval_steps_per_second": 4.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 6994
+ },
+ {
+ "epoch": 269.2307692307692,
+ "grad_norm": 17604.1328125,
+ "learning_rate": 5.202671165416819e-07,
+ "loss": 0.2132,
+ "step": 7000
+ },
+ {
+ "epoch": 270.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9305936180072407,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950377916529741,
+ "eval_f1_macro": 0.8196091213903969,
+ "eval_loss": 0.2680298984050751,
+ "eval_pr_auc": 0.6846741481205671,
+ "eval_precision": 0.7085427135678392,
+ "eval_precision_macro": 0.8247203168031008,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.814762009159177,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.148,
+ "eval_steps_per_second": 3.829,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7020
+ },
+ {
+ "epoch": 271.0,
+ "eval_accuracy": 0.9054809843400448,
+ "eval_auc": 0.9306496966662317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6938909929194796,
+ "eval_f1_macro": 0.8190015347004277,
+ "eval_loss": 0.2681294083595276,
+ "eval_pr_auc": 0.6847333752342389,
+ "eval_precision": 0.7089502018842531,
+ "eval_precision_macro": 0.8247074919339809,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6794582392776524,
+ "eval_recall_macro": 0.8136230050737269,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.335,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7046
+ },
+ {
+ "epoch": 272.0,
+ "eval_accuracy": 0.9053792963188937,
+ "eval_auc": 0.9306815900653141,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6932586121641668,
+ "eval_f1_macro": 0.8186603259504293,
+ "eval_loss": 0.26823949813842773,
+ "eval_pr_auc": 0.6847385438827486,
+ "eval_precision": 0.7090357383681726,
+ "eval_precision_macro": 0.8246412077064189,
+ "eval_pred_class_0": 16702,
+ "eval_pred_class_1": 2966,
+ "eval_predicted_binding_ratio": 0.15080333536709375,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130384127913689,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 909.129,
+ "eval_steps_per_second": 5.577,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7072
+ },
+ {
+ "epoch": 273.0,
+ "eval_accuracy": 0.905328452308318,
+ "eval_auc": 0.9307212597310633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6931443638760711,
+ "eval_f1_macro": 0.8185873316314347,
+ "eval_loss": 0.2681174576282501,
+ "eval_pr_auc": 0.6852124592316542,
+ "eval_precision": 0.7087967644084934,
+ "eval_precision_macro": 0.8245199318120546,
+ "eval_pred_class_0": 16701,
+ "eval_pred_class_1": 2967,
+ "eval_predicted_binding_ratio": 0.15085417937766932,
+ "eval_recall": 0.6781683327958723,
+ "eval_recall_macro": 0.8130082323121028,
+ "eval_runtime": 0.1693,
+ "eval_samples_per_second": 962.904,
+ "eval_steps_per_second": 5.907,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7098
+ },
+ {
+ "epoch": 274.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9307878592214269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6947368421052632,
+ "eval_f1_macro": 0.8194636820581644,
+ "eval_loss": 0.2679351270198822,
+ "eval_pr_auc": 0.685659065605018,
+ "eval_precision": 0.7089627391742196,
+ "eval_precision_macro": 0.8248510741829513,
+ "eval_pred_class_0": 16689,
+ "eval_pred_class_1": 2979,
+ "eval_predicted_binding_ratio": 0.15146430750457596,
+ "eval_recall": 0.6810706223798775,
+ "eval_recall_macro": 0.8143688356663075,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.933,
+ "eval_steps_per_second": 4.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7124
+ },
+ {
+ "epoch": 275.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9308354511413273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6945496459739832,
+ "eval_f1_macro": 0.8193909880953703,
+ "eval_loss": 0.2679973542690277,
+ "eval_pr_auc": 0.6858486839853987,
+ "eval_precision": 0.7096231493943472,
+ "eval_precision_macro": 0.8251038602745574,
+ "eval_pred_class_0": 16696,
+ "eval_pred_class_1": 2972,
+ "eval_predicted_binding_ratio": 0.15110839943054707,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8140058426527039,
+ "eval_runtime": 0.1976,
+ "eval_samples_per_second": 825.097,
+ "eval_steps_per_second": 5.062,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7150
+ },
+ {
+ "epoch": 276.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9308506727696961,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.694320987654321,
+ "eval_f1_macro": 0.819244917025501,
+ "eval_loss": 0.26807495951652527,
+ "eval_pr_auc": 0.6856600891550617,
+ "eval_precision": 0.7091459314055144,
+ "eval_precision_macro": 0.8248616921913159,
+ "eval_pred_class_0": 16694,
+ "eval_pred_class_1": 2974,
+ "eval_predicted_binding_ratio": 0.1512100874516982,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.8139454816941719,
+ "eval_runtime": 0.2211,
+ "eval_samples_per_second": 737.174,
+ "eval_steps_per_second": 4.523,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7176
+ },
+ {
+ "epoch": 277.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9308575828439557,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946640316205533,
+ "eval_f1_macro": 0.8194640504423113,
+ "eval_loss": 0.26810142397880554,
+ "eval_pr_auc": 0.6855507532370865,
+ "eval_precision": 0.709861999326826,
+ "eval_precision_macro": 0.8252250644654733,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6801031925185425,
+ "eval_recall_macro": 0.81403602313197,
+ "eval_runtime": 0.2072,
+ "eval_samples_per_second": 786.583,
+ "eval_steps_per_second": 4.826,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7202
+ },
+ {
+ "epoch": 278.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9309145374278527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6941467436108821,
+ "eval_f1_macro": 0.8191962415719043,
+ "eval_loss": 0.26818612217903137,
+ "eval_pr_auc": 0.6856281742604067,
+ "eval_precision": 0.7101889338731444,
+ "eval_precision_macro": 0.8252812485457677,
+ "eval_pred_class_0": 16704,
+ "eval_pred_class_1": 2964,
+ "eval_predicted_binding_ratio": 0.15070164734594266,
+ "eval_recall": 0.6788132860367624,
+ "eval_recall_macro": 0.813481611328878,
+ "eval_runtime": 0.1872,
+ "eval_samples_per_second": 870.561,
+ "eval_steps_per_second": 5.341,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7228
+ },
+ {
+ "epoch": 279.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.9309855846702396,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.69433465085639,
+ "eval_f1_macro": 0.8192692975301671,
+ "eval_loss": 0.2680346667766571,
+ "eval_pr_auc": 0.686232632159634,
+ "eval_precision": 0.7095254123190845,
+ "eval_precision_macro": 0.825026825462411,
+ "eval_pred_class_0": 16697,
+ "eval_pred_class_1": 2971,
+ "eval_predicted_binding_ratio": 0.15105755541997154,
+ "eval_recall": 0.6797807158980974,
+ "eval_recall_macro": 0.8138446043424814,
+ "eval_runtime": 0.241,
+ "eval_samples_per_second": 676.212,
+ "eval_steps_per_second": 4.149,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7254
+ },
+ {
+ "epoch": 280.0,
+ "eval_accuracy": 0.9056335163717714,
+ "eval_auc": 0.930922634866985,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6957377049180328,
+ "eval_f1_macro": 0.8199473215888755,
+ "eval_loss": 0.26806843280792236,
+ "eval_pr_auc": 0.6857403383581059,
+ "eval_precision": 0.70756918972991,
+ "eval_precision_macro": 0.8244187060893835,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8156794139758723,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.751,
+ "eval_steps_per_second": 5.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7280
+ },
+ {
+ "epoch": 281.0,
+ "eval_accuracy": 0.9055826723611958,
+ "eval_auc": 0.9309180411274773,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959227116423776,
+ "eval_f1_macro": 0.8200188959217034,
+ "eval_loss": 0.2680058181285858,
+ "eval_pr_auc": 0.6857513786045211,
+ "eval_precision": 0.7069194943446441,
+ "eval_precision_macro": 0.8241715464761271,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8160424069894758,
+ "eval_runtime": 0.2526,
+ "eval_samples_per_second": 645.248,
+ "eval_steps_per_second": 3.959,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7306
+ },
+ {
+ "epoch": 282.0,
+ "eval_accuracy": 0.9055318283506203,
+ "eval_auc": 0.930996368279084,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6950098489822718,
+ "eval_f1_macro": 0.8195600321797414,
+ "eval_loss": 0.2679016888141632,
+ "eval_pr_auc": 0.6864832067617813,
+ "eval_precision": 0.7077900367769977,
+ "eval_precision_macro": 0.824393309448042,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.814963763862558,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.664,
+ "eval_steps_per_second": 3.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7332
+ },
+ {
+ "epoch": 283.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.9310520771031147,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.2679460644721985,
+ "eval_pr_auc": 0.6867185372000254,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.949,
+ "eval_steps_per_second": 4.994,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7358
+ },
+ {
+ "epoch": 284.0,
+ "eval_accuracy": 0.905684360382347,
+ "eval_auc": 0.931065877786636,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695152013147083,
+ "eval_f1_macro": 0.8196821086456596,
+ "eval_loss": 0.26795604825019836,
+ "eval_pr_auc": 0.6868264245068394,
+ "eval_precision": 0.7087801608579088,
+ "eval_precision_macro": 0.8248408116684653,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6820380522412125,
+ "eval_recall_macro": 0.8147921896384429,
+ "eval_runtime": 0.2632,
+ "eval_samples_per_second": 619.283,
+ "eval_steps_per_second": 3.799,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7384
+ },
+ {
+ "epoch": 285.0,
+ "eval_accuracy": 0.9057352043929225,
+ "eval_auc": 0.9310428312291054,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696165191740413,
+ "eval_f1_macro": 0.8201894743681304,
+ "eval_loss": 0.2679717540740967,
+ "eval_pr_auc": 0.6865300625458848,
+ "eval_precision": 0.7077640786404532,
+ "eval_precision_macro": 0.8245726255085029,
+ "eval_pred_class_0": 16667,
+ "eval_pred_class_1": 3001,
+ "eval_predicted_binding_ratio": 0.15258287573723817,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160018905963173,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.133,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7410
+ },
+ {
+ "epoch": 286.0,
+ "eval_accuracy": 0.9054301403294692,
+ "eval_auc": 0.9311142093764568,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6946815495732108,
+ "eval_f1_macro": 0.8193658018591599,
+ "eval_loss": 0.2679450213909149,
+ "eval_pr_auc": 0.6869273130451355,
+ "eval_precision": 0.7074557004346372,
+ "eval_precision_macro": 0.8241961598653369,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8147723450730694,
+ "eval_runtime": 0.1793,
+ "eval_samples_per_second": 908.844,
+ "eval_steps_per_second": 5.576,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7436
+ },
+ {
+ "epoch": 287.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9311615190538873,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.695695037791653,
+ "eval_f1_macro": 0.8199978948356761,
+ "eval_loss": 0.2679760158061981,
+ "eval_pr_auc": 0.6872164392300586,
+ "eval_precision": 0.7092127303182579,
+ "eval_precision_macro": 0.8251152664358777,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8151448467381539,
+ "eval_runtime": 0.1776,
+ "eval_samples_per_second": 917.947,
+ "eval_steps_per_second": 5.632,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7462
+ },
+ {
+ "epoch": 288.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9311948528628156,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6959237343852729,
+ "eval_f1_macro": 0.8201439915761322,
+ "eval_loss": 0.26804664731025696,
+ "eval_pr_auc": 0.6873402512916988,
+ "eval_precision": 0.7096882333221589,
+ "eval_precision_macro": 0.8253565529811274,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.815205207696686,
+ "eval_runtime": 0.2679,
+ "eval_samples_per_second": 608.503,
+ "eval_steps_per_second": 3.733,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7488
+ },
+ {
+ "epoch": 288.46153846153845,
+ "grad_norm": 18250.5078125,
+ "learning_rate": 4.5321317063898914e-07,
+ "loss": 0.2101,
+ "step": 7500
+ },
+ {
+ "epoch": 289.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9312627857055362,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6958949096880132,
+ "eval_f1_macro": 0.8200944800500465,
+ "eval_loss": 0.26791396737098694,
+ "eval_pr_auc": 0.6879250403674073,
+ "eval_precision": 0.7089327534292406,
+ "eval_precision_macro": 0.8250281609942534,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.8154069624000669,
+ "eval_runtime": 0.2277,
+ "eval_samples_per_second": 715.925,
+ "eval_steps_per_second": 4.392,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7514
+ },
+ {
+ "epoch": 290.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9312241087546806,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696522309711286,
+ "eval_f1_macro": 0.8204332366847645,
+ "eval_loss": 0.26787513494491577,
+ "eval_pr_auc": 0.6878638814996979,
+ "eval_precision": 0.7088480801335559,
+ "eval_precision_macro": 0.8250951850316913,
+ "eval_pred_class_0": 16673,
+ "eval_pred_class_1": 2995,
+ "eval_predicted_binding_ratio": 0.15227781167378482,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.815991554682425,
+ "eval_runtime": 0.2212,
+ "eval_samples_per_second": 736.836,
+ "eval_steps_per_second": 4.52,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7540
+ },
+ {
+ "epoch": 291.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9312868249779602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975369458128079,
+ "eval_f1_macro": 0.8210658921448086,
+ "eval_loss": 0.26807519793510437,
+ "eval_pr_auc": 0.6876156626538744,
+ "eval_precision": 0.7106055536968886,
+ "eval_precision_macro": 0.8260144502101566,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8163640563475095,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.522,
+ "eval_steps_per_second": 3.874,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7566
+ },
+ {
+ "epoch": 292.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9313246454689077,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971935007385525,
+ "eval_f1_macro": 0.8208465473190101,
+ "eval_loss": 0.26810526847839355,
+ "eval_pr_auc": 0.6877564948921628,
+ "eval_precision": 0.7098930481283422,
+ "eval_precision_macro": 0.8256529284776994,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162735149097116,
+ "eval_runtime": 0.206,
+ "eval_samples_per_second": 791.322,
+ "eval_steps_per_second": 4.855,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7592
+ },
+ {
+ "epoch": 293.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9313187962370344,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6969647251845775,
+ "eval_f1_macro": 0.8207004065741184,
+ "eval_loss": 0.26795074343681335,
+ "eval_pr_auc": 0.6879453119558532,
+ "eval_precision": 0.7094188376753507,
+ "eval_precision_macro": 0.8254123095657551,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8162131539511794,
+ "eval_runtime": 0.2264,
+ "eval_samples_per_second": 719.954,
+ "eval_steps_per_second": 4.417,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7618
+ },
+ {
+ "epoch": 294.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9313842959550158,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966809069996713,
+ "eval_f1_macro": 0.820581055003595,
+ "eval_loss": 0.2678394019603729,
+ "eval_pr_auc": 0.6884837475836854,
+ "eval_precision": 0.7102177554438861,
+ "eval_precision_macro": 0.8257076908850431,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157191031066195,
+ "eval_runtime": 0.1788,
+ "eval_samples_per_second": 911.642,
+ "eval_steps_per_second": 5.593,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7644
+ },
+ {
+ "epoch": 295.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314417858263554,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696337658071933,
+ "eval_f1_macro": 0.8203618088536944,
+ "eval_loss": 0.26788315176963806,
+ "eval_pr_auc": 0.6885944465147925,
+ "eval_precision": 0.7095046854082999,
+ "eval_precision_macro": 0.8253458678840061,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156285616688215,
+ "eval_runtime": 0.2717,
+ "eval_samples_per_second": 599.832,
+ "eval_steps_per_second": 3.68,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7670
+ },
+ {
+ "epoch": 296.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9314500389854711,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696636587366694,
+ "eval_f1_macro": 0.8205062543343502,
+ "eval_loss": 0.2678987681865692,
+ "eval_pr_auc": 0.6885305487751676,
+ "eval_precision": 0.7090848363393454,
+ "eval_precision_macro": 0.8252153220919469,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816021735161691,
+ "eval_runtime": 0.2551,
+ "eval_samples_per_second": 639.002,
+ "eval_steps_per_second": 3.92,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7696
+ },
+ {
+ "epoch": 297.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.931550575699698,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965959546127282,
+ "eval_f1_macro": 0.8205577878611678,
+ "eval_loss": 0.2679198086261749,
+ "eval_pr_auc": 0.6889351423284887,
+ "eval_precision": 0.710738255033557,
+ "eval_precision_macro": 0.8259168264621285,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8154871679239726,
+ "eval_runtime": 0.2409,
+ "eval_samples_per_second": 676.584,
+ "eval_steps_per_second": 4.151,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7722
+ },
+ {
+ "epoch": 298.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9316071409836368,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964256300444738,
+ "eval_f1_macro": 0.82051102635547,
+ "eval_loss": 0.2680239677429199,
+ "eval_pr_auc": 0.6888956424322248,
+ "eval_precision": 0.7117845117845117,
+ "eval_precision_macro": 0.8263378182350514,
+ "eval_pred_class_0": 16698,
+ "eval_pred_class_1": 2970,
+ "eval_predicted_binding_ratio": 0.15100671140939598,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8150232975586785,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.263,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7748
+ },
+ {
+ "epoch": 299.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9315966104197652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6972235912600624,
+ "eval_f1_macro": 0.8208966763783243,
+ "eval_loss": 0.26803261041641235,
+ "eval_pr_auc": 0.6887732742755047,
+ "eval_precision": 0.7106496985934361,
+ "eval_precision_macro": 0.8259818448607991,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8160717602063305,
+ "eval_runtime": 0.2484,
+ "eval_samples_per_second": 656.12,
+ "eval_steps_per_second": 4.025,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7774
+ },
+ {
+ "epoch": 300.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9315960653998236,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966218432272876,
+ "eval_f1_macro": 0.8204813289787078,
+ "eval_loss": 0.26792290806770325,
+ "eval_pr_auc": 0.6890570542847262,
+ "eval_precision": 0.7087087087087087,
+ "eval_precision_macro": 0.8250519729735134,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8161226125133815,
+ "eval_runtime": 0.2626,
+ "eval_samples_per_second": 620.694,
+ "eval_steps_per_second": 3.808,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7800
+ },
+ {
+ "epoch": 301.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9316357350655727,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966957093539372,
+ "eval_f1_macro": 0.8206059967994839,
+ "eval_loss": 0.26809555292129517,
+ "eval_pr_auc": 0.6889378655811479,
+ "eval_precision": 0.710596914822267,
+ "eval_precision_macro": 0.8258725914156881,
+ "eval_pred_class_0": 16686,
+ "eval_pred_class_1": 2982,
+ "eval_predicted_binding_ratio": 0.15161683953630262,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815618225754929,
+ "eval_runtime": 0.2178,
+ "eval_samples_per_second": 748.303,
+ "eval_steps_per_second": 4.591,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7826
+ },
+ {
+ "epoch": 302.0,
+ "eval_accuracy": 0.9061419564775269,
+ "eval_auc": 0.9317000084886855,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965811965811965,
+ "eval_f1_macro": 0.8205328694321837,
+ "eval_loss": 0.26798245310783386,
+ "eval_pr_auc": 0.6894273728032447,
+ "eval_precision": 0.7103586992960107,
+ "eval_precision_macro": 0.8257517200405735,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815588045275663,
+ "eval_runtime": 0.2561,
+ "eval_samples_per_second": 636.477,
+ "eval_steps_per_second": 3.905,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7852
+ },
+ {
+ "epoch": 303.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9316820812256066,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966513460275772,
+ "eval_f1_macro": 0.8205311837826491,
+ "eval_loss": 0.26780617237091064,
+ "eval_pr_auc": 0.6897785082602487,
+ "eval_precision": 0.7094617184887997,
+ "eval_precision_macro": 0.8253790573615671,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159208578100005,
+ "eval_runtime": 0.1902,
+ "eval_samples_per_second": 856.826,
+ "eval_steps_per_second": 5.257,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7878
+ },
+ {
+ "epoch": 304.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9317363691047894,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.26783081889152527,
+ "eval_pr_auc": 0.690108350201664,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.745,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7904
+ },
+ {
+ "epoch": 305.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318277475374976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6962670613385956,
+ "eval_f1_macro": 0.8203632705580364,
+ "eval_loss": 0.267810195684433,
+ "eval_pr_auc": 0.6906198234983021,
+ "eval_precision": 0.7104026845637584,
+ "eval_precision_macro": 0.82571907957814,
+ "eval_pred_class_0": 16688,
+ "eval_pred_class_1": 2980,
+ "eval_predicted_binding_ratio": 0.15151515151515152,
+ "eval_recall": 0.6826830054821026,
+ "eval_recall_macro": 0.8152957491344841,
+ "eval_runtime": 0.2457,
+ "eval_samples_per_second": 663.513,
+ "eval_steps_per_second": 4.071,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7930
+ },
+ {
+ "epoch": 306.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318495094051658,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26770758628845215,
+ "eval_pr_auc": 0.6907229577841941,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.459,
+ "eval_steps_per_second": 4.046,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7956
+ },
+ {
+ "epoch": 307.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9318431735483448,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6973381531383503,
+ "eval_f1_macro": 0.8209698284488745,
+ "eval_loss": 0.26784345507621765,
+ "eval_pr_auc": 0.6903028236900399,
+ "eval_precision": 0.7108877721943049,
+ "eval_precision_macro": 0.8261026405178202,
+ "eval_pred_class_0": 16683,
+ "eval_pred_class_1": 2985,
+ "eval_predicted_binding_ratio": 0.15176937156802928,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161019406855965,
+ "eval_runtime": 0.2339,
+ "eval_samples_per_second": 696.845,
+ "eval_steps_per_second": 4.275,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 7982
+ },
+ {
+ "epoch": 307.6923076923077,
+ "grad_norm": 18753.48046875,
+ "learning_rate": 3.8700127731844033e-07,
+ "loss": 0.2071,
+ "step": 8000
+ },
+ {
+ "epoch": 308.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9318915635331595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6966661192314009,
+ "eval_f1_macro": 0.8205561173351938,
+ "eval_loss": 0.2679731547832489,
+ "eval_pr_auc": 0.6902808443840289,
+ "eval_precision": 0.7098393574297188,
+ "eval_precision_macro": 0.8255431799139001,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81581998045831,
+ "eval_runtime": 0.2227,
+ "eval_samples_per_second": 731.952,
+ "eval_steps_per_second": 4.491,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8008
+ },
+ {
+ "epoch": 309.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319105808361218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975217462662071,
+ "eval_f1_macro": 0.821040751603759,
+ "eval_loss": 0.2678290605545044,
+ "eval_pr_auc": 0.6904266123808676,
+ "eval_precision": 0.7102272727272727,
+ "eval_precision_macro": 0.8258500239865676,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164649336992,
+ "eval_runtime": 0.2365,
+ "eval_samples_per_second": 689.086,
+ "eval_steps_per_second": 4.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8034
+ },
+ {
+ "epoch": 310.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9319563819762139,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977201902575036,
+ "eval_f1_macro": 0.821136637744354,
+ "eval_loss": 0.2677942216396332,
+ "eval_pr_auc": 0.6905770539125178,
+ "eval_precision": 0.7099465954606141,
+ "eval_precision_macro": 0.8257626451391362,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6859077716865527,
+ "eval_recall_macro": 0.816727049361113,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 672.991,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8060
+ },
+ {
+ "epoch": 311.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9320261250637406,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975673898750822,
+ "eval_f1_macro": 0.8211161862162611,
+ "eval_loss": 0.2678627669811249,
+ "eval_pr_auc": 0.6908896136415948,
+ "eval_precision": 0.7113643982567884,
+ "eval_precision_macro": 0.8263444706297427,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8161623016441286,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.505,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8086
+ },
+ {
+ "epoch": 312.0,
+ "eval_accuracy": 0.9060402684563759,
+ "eval_auc": 0.9320056478859348,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.696551724137931,
+ "eval_f1_macro": 0.8204830448879512,
+ "eval_loss": 0.2678248882293701,
+ "eval_pr_auc": 0.6908301522653787,
+ "eval_precision": 0.7096018735362998,
+ "eval_precision_macro": 0.8254226766806146,
+ "eval_pred_class_0": 16679,
+ "eval_pred_class_1": 2989,
+ "eval_predicted_binding_ratio": 0.1519727476103315,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815789799979044,
+ "eval_runtime": 0.2616,
+ "eval_samples_per_second": 623.086,
+ "eval_steps_per_second": 3.823,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8112
+ },
+ {
+ "epoch": 313.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9319988935316585,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.2678254544734955,
+ "eval_pr_auc": 0.6908227024589884,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2589,
+ "eval_samples_per_second": 629.545,
+ "eval_steps_per_second": 3.862,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8138
+ },
+ {
+ "epoch": 314.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9320198184044164,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6971634694212165,
+ "eval_f1_macro": 0.8207964351950081,
+ "eval_loss": 0.2678711414337158,
+ "eval_pr_auc": 0.6909501280556708,
+ "eval_precision": 0.7091394262841895,
+ "eval_precision_macro": 0.8253255619723288,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6855852950661077,
+ "eval_recall_macro": 0.8164752696130925,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.201,
+ "eval_steps_per_second": 3.768,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8164
+ },
+ {
+ "epoch": 315.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9320753325784678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2678229808807373,
+ "eval_pr_auc": 0.6914105766155315,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.375,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8190
+ },
+ {
+ "epoch": 316.0,
+ "eval_accuracy": 0.9061928004881025,
+ "eval_auc": 0.9321298151076297,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967953985209532,
+ "eval_f1_macro": 0.8206541727499956,
+ "eval_loss": 0.26796844601631165,
+ "eval_pr_auc": 0.6912288183485439,
+ "eval_precision": 0.710455764075067,
+ "eval_precision_macro": 0.8258284574391159,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8157492835858855,
+ "eval_runtime": 0.2237,
+ "eval_samples_per_second": 728.567,
+ "eval_steps_per_second": 4.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8216
+ },
+ {
+ "epoch": 317.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9321722877330785,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6956235603817045,
+ "eval_f1_macro": 0.8199989231339035,
+ "eval_loss": 0.2681267261505127,
+ "eval_pr_auc": 0.6912998271961284,
+ "eval_precision": 0.7101108498488411,
+ "eval_precision_macro": 0.8254885924997606,
+ "eval_pred_class_0": 16691,
+ "eval_pred_class_1": 2977,
+ "eval_predicted_binding_ratio": 0.15136261948342486,
+ "eval_recall": 0.6817155756207675,
+ "eval_recall_macro": 0.8148120342038165,
+ "eval_runtime": 0.1877,
+ "eval_samples_per_second": 868.412,
+ "eval_steps_per_second": 5.328,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8242
+ },
+ {
+ "epoch": 318.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9321751880177678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965664530967636,
+ "eval_f1_macro": 0.8205079551116469,
+ "eval_loss": 0.26805615425109863,
+ "eval_pr_auc": 0.6913470747613989,
+ "eval_precision": 0.709979906229069,
+ "eval_precision_macro": 0.8255870038278783,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6836504353434376,
+ "eval_recall_macro": 0.8156889226273535,
+ "eval_runtime": 0.2426,
+ "eval_samples_per_second": 671.932,
+ "eval_steps_per_second": 4.122,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8268
+ },
+ {
+ "epoch": 319.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9321843949617812,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961380443714051,
+ "eval_f1_macro": 0.8202653471082613,
+ "eval_loss": 0.267932653427124,
+ "eval_pr_auc": 0.6916487257323465,
+ "eval_precision": 0.7097855227882037,
+ "eval_precision_macro": 0.8254333991308556,
+ "eval_pred_class_0": 16684,
+ "eval_pred_class_1": 2984,
+ "eval_predicted_binding_ratio": 0.15171852755745374,
+ "eval_recall": 0.6830054821025475,
+ "eval_recall_macro": 0.8153664460069084,
+ "eval_runtime": 0.2569,
+ "eval_samples_per_second": 634.38,
+ "eval_steps_per_second": 3.892,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8294
+ },
+ {
+ "epoch": 320.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.932188093311385,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6964373666064686,
+ "eval_f1_macro": 0.8204099902666875,
+ "eval_loss": 0.2679634094238281,
+ "eval_pr_auc": 0.6915655068510385,
+ "eval_precision": 0.7093645484949833,
+ "eval_precision_macro": 0.8253022526621696,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815759619499778,
+ "eval_runtime": 0.2483,
+ "eval_samples_per_second": 656.448,
+ "eval_steps_per_second": 4.027,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8320
+ },
+ {
+ "epoch": 321.0,
+ "eval_accuracy": 0.9058877364246491,
+ "eval_auc": 0.9321906237611137,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965076242006887,
+ "eval_f1_macro": 0.8204083386821658,
+ "eval_loss": 0.26790735125541687,
+ "eval_pr_auc": 0.6917736045731879,
+ "eval_precision": 0.7084723148765844,
+ "eval_precision_macro": 0.8249320182661266,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6849403418252177,
+ "eval_recall_macro": 0.8160924320341154,
+ "eval_runtime": 0.2593,
+ "eval_samples_per_second": 628.581,
+ "eval_steps_per_second": 3.856,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8346
+ },
+ {
+ "epoch": 322.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.932275423024527,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6955950032873109,
+ "eval_f1_macro": 0.8199495526481063,
+ "eval_loss": 0.2680682837963104,
+ "eval_pr_auc": 0.6919499023976591,
+ "eval_precision": 0.709353000335233,
+ "eval_precision_macro": 0.8251589694514043,
+ "eval_pred_class_0": 16685,
+ "eval_pred_class_1": 2983,
+ "eval_predicted_binding_ratio": 0.15166768354687818,
+ "eval_recall": 0.6823605288616575,
+ "eval_recall_macro": 0.8150137889071974,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.703,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8372
+ },
+ {
+ "epoch": 323.0,
+ "eval_accuracy": 0.9059894244458003,
+ "eval_auc": 0.9322442887603632,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6965370096832431,
+ "eval_f1_macro": 0.8204581387495119,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6918309607756195,
+ "eval_precision": 0.7092245989304813,
+ "eval_precision_macro": 0.8252587374599636,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8158906773307344,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.368,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8398
+ },
+ {
+ "epoch": 324.0,
+ "eval_accuracy": 0.9058368924140736,
+ "eval_auc": 0.9322356073712934,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6960945191992124,
+ "eval_f1_macro": 0.8201909332654507,
+ "eval_loss": 0.2680003046989441,
+ "eval_pr_auc": 0.6918708088537314,
+ "eval_precision": 0.7086535248914133,
+ "eval_precision_macro": 0.8249414550993799,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.81566907806198,
+ "eval_runtime": 0.1979,
+ "eval_samples_per_second": 823.473,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8424
+ },
+ {
+ "epoch": 325.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322670238779269,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6961235216819974,
+ "eval_f1_macro": 0.8202405385118361,
+ "eval_loss": 0.26794886589050293,
+ "eval_pr_auc": 0.6921960622354616,
+ "eval_precision": 0.7094074322062269,
+ "eval_precision_macro": 0.8252690299332196,
+ "eval_pred_class_0": 16681,
+ "eval_pred_class_1": 2987,
+ "eval_predicted_binding_ratio": 0.1518710595891804,
+ "eval_recall": 0.6833279587229926,
+ "eval_recall_macro": 0.815467323358599,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.5,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8450
+ },
+ {
+ "epoch": 326.0,
+ "eval_accuracy": 0.9059385804352247,
+ "eval_auc": 0.9322930096501425,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6963230466185161,
+ "eval_f1_macro": 0.8203369534620676,
+ "eval_loss": 0.2680268883705139,
+ "eval_pr_auc": 0.6921477872574622,
+ "eval_precision": 0.7091273821464393,
+ "eval_precision_macro": 0.8251819077788622,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6839729119638827,
+ "eval_recall_macro": 0.815729439020512,
+ "eval_runtime": 0.1733,
+ "eval_samples_per_second": 940.367,
+ "eval_steps_per_second": 5.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8476
+ },
+ {
+ "epoch": 326.9230769230769,
+ "grad_norm": 17241.076171875,
+ "learning_rate": 3.2282309449959705e-07,
+ "loss": 0.2047,
+ "step": 8500
+ },
+ {
+ "epoch": 327.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323445529646195,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26804205775260925,
+ "eval_pr_auc": 0.6924439463998024,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1771,
+ "eval_samples_per_second": 920.47,
+ "eval_steps_per_second": 5.647,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8502
+ },
+ {
+ "epoch": 328.0,
+ "eval_accuracy": 0.9060911124669514,
+ "eval_auc": 0.9323743441439272,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6967657199146281,
+ "eval_f1_macro": 0.820604246632002,
+ "eval_loss": 0.26802849769592285,
+ "eval_pr_auc": 0.6925977669253861,
+ "eval_precision": 0.7096989966555184,
+ "eval_precision_macro": 0.8254994563562998,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6842953885843276,
+ "eval_recall_macro": 0.8159510382892665,
+ "eval_runtime": 0.1678,
+ "eval_samples_per_second": 971.421,
+ "eval_steps_per_second": 5.96,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8528
+ },
+ {
+ "epoch": 329.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.9323637649175607,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983295119554537,
+ "eval_f1_macro": 0.8214488366277419,
+ "eval_loss": 0.2680566608905792,
+ "eval_pr_auc": 0.6924500045026715,
+ "eval_precision": 0.7094841930116472,
+ "eval_precision_macro": 0.825665699698526,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8174125189951615,
+ "eval_runtime": 0.2683,
+ "eval_samples_per_second": 607.54,
+ "eval_steps_per_second": 3.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8554
+ },
+ {
+ "epoch": 330.0,
+ "eval_accuracy": 0.906243644498678,
+ "eval_auc": 0.9324242037360844,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6974072858549393,
+ "eval_f1_macro": 0.8209676462967013,
+ "eval_loss": 0.26804018020629883,
+ "eval_pr_auc": 0.6928556723686659,
+ "eval_precision": 0.7099899766120948,
+ "eval_precision_macro": 0.8257296209897056,
+ "eval_pred_class_0": 16675,
+ "eval_pred_class_1": 2993,
+ "eval_predicted_binding_ratio": 0.15217612365263372,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8164347532199341,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.585,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8580
+ },
+ {
+ "epoch": 331.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9323838917254041,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6981162981162982,
+ "eval_f1_macro": 0.8213280175544326,
+ "eval_loss": 0.2678423821926117,
+ "eval_pr_auc": 0.6929480254197629,
+ "eval_precision": 0.7093874833555259,
+ "eval_precision_macro": 0.8255890849326837,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817251280684939,
+ "eval_runtime": 0.2474,
+ "eval_samples_per_second": 658.814,
+ "eval_steps_per_second": 4.042,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8606
+ },
+ {
+ "epoch": 332.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9323530591687079,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993784756297023,
+ "eval_f1_macro": 0.8220268454242666,
+ "eval_loss": 0.26779934763908386,
+ "eval_pr_auc": 0.6928104051729911,
+ "eval_precision": 0.7095917690009956,
+ "eval_precision_macro": 0.8258856473344816,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183195878979646,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.815,
+ "eval_steps_per_second": 3.766,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8632
+ },
+ {
+ "epoch": 333.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.932432009200248,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983450761920367,
+ "eval_f1_macro": 0.8214741659959972,
+ "eval_loss": 0.2679860591888428,
+ "eval_pr_auc": 0.6929996618872077,
+ "eval_precision": 0.7098600932711525,
+ "eval_precision_macro": 0.8258288825890143,
+ "eval_pred_class_0": 16666,
+ "eval_pred_class_1": 3002,
+ "eval_predicted_binding_ratio": 0.1526337197478137,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.817311641643471,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.69,
+ "eval_steps_per_second": 5.55,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8658
+ },
+ {
+ "epoch": 334.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325279911049631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6975521603417119,
+ "eval_f1_macro": 0.8210910370116632,
+ "eval_loss": 0.26808273792266846,
+ "eval_pr_auc": 0.6933573725062909,
+ "eval_precision": 0.7109845947756196,
+ "eval_precision_macro": 0.8261792653772595,
+ "eval_pred_class_0": 16682,
+ "eval_pred_class_1": 2986,
+ "eval_predicted_binding_ratio": 0.15182021557860484,
+ "eval_recall": 0.6846178652047726,
+ "eval_recall_macro": 0.816263178995819,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.721,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8684
+ },
+ {
+ "epoch": 335.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932545276023111,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6976362442547603,
+ "eval_f1_macro": 0.8211138747443938,
+ "eval_loss": 0.26816996932029724,
+ "eval_pr_auc": 0.6932853419412803,
+ "eval_precision": 0.710464727515881,
+ "eval_precision_macro": 0.8259705061096825,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816495114178466,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.649,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8710
+ },
+ {
+ "epoch": 336.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9325305604846879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680180072784424,
+ "eval_pr_auc": 0.6935243427052671,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.264,
+ "eval_samples_per_second": 617.515,
+ "eval_steps_per_second": 3.788,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8736
+ },
+ {
+ "epoch": 337.0,
+ "eval_accuracy": 0.9063453325198292,
+ "eval_auc": 0.932509878924404,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.698032786885246,
+ "eval_f1_macro": 0.8213054775682699,
+ "eval_loss": 0.2678954601287842,
+ "eval_pr_auc": 0.6935428752602653,
+ "eval_precision": 0.7099033011003668,
+ "eval_precision_macro": 0.8257957323787274,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.817019345502292,
+ "eval_runtime": 0.2614,
+ "eval_samples_per_second": 623.491,
+ "eval_steps_per_second": 3.825,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8762
+ },
+ {
+ "epoch": 338.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325608674864403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989176779271893,
+ "eval_f1_macro": 0.8218398486513014,
+ "eval_loss": 0.26806220412254333,
+ "eval_pr_auc": 0.6935170525264562,
+ "eval_precision": 0.7110443777110443,
+ "eval_precision_macro": 0.8264297528888735,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6871976781683328,
+ "eval_recall_macro": 0.8174625440398011,
+ "eval_runtime": 0.1767,
+ "eval_samples_per_second": 922.3,
+ "eval_steps_per_second": 5.658,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8788
+ },
+ {
+ "epoch": 339.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9325807801793065,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990163934426229,
+ "eval_f1_macro": 0.8218875444165816,
+ "eval_loss": 0.26803991198539734,
+ "eval_pr_auc": 0.6936355510877774,
+ "eval_precision": 0.7109036345448483,
+ "eval_precision_macro": 0.8263858865027319,
+ "eval_pred_class_0": 16669,
+ "eval_pred_class_1": 2999,
+ "eval_predicted_binding_ratio": 0.15248118771608704,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8175936018707576,
+ "eval_runtime": 0.257,
+ "eval_samples_per_second": 634.235,
+ "eval_steps_per_second": 3.891,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8814
+ },
+ {
+ "epoch": 340.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326051308916972,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983762506150566,
+ "eval_f1_macro": 0.8215248382050282,
+ "eval_loss": 0.26792144775390625,
+ "eval_pr_auc": 0.6939264625162457,
+ "eval_precision": 0.7106141522029372,
+ "eval_precision_macro": 0.8261564043164398,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8171098869400901,
+ "eval_runtime": 0.2744,
+ "eval_samples_per_second": 593.925,
+ "eval_steps_per_second": 3.644,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8840
+ },
+ {
+ "epoch": 341.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9325905710732574,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6980482204362801,
+ "eval_f1_macro": 0.821330737974691,
+ "eval_loss": 0.26793381571769714,
+ "eval_pr_auc": 0.6938155556406347,
+ "eval_precision": 0.7102803738317757,
+ "eval_precision_macro": 0.825959524727788,
+ "eval_pred_class_0": 16672,
+ "eval_pred_class_1": 2996,
+ "eval_predicted_binding_ratio": 0.15232865568436038,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169184681506015,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.704,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8866
+ },
+ {
+ "epoch": 342.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9325911160931989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997542997542997,
+ "eval_f1_macro": 0.8222974802915219,
+ "eval_loss": 0.26800957322120667,
+ "eval_pr_auc": 0.6936597261010234,
+ "eval_precision": 0.711051930758988,
+ "eval_precision_macro": 0.8265713326382553,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8182083746323816,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 923.118,
+ "eval_steps_per_second": 5.663,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8892
+ },
+ {
+ "epoch": 343.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9326288587241547,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982772764561116,
+ "eval_f1_macro": 0.821477015533191,
+ "eval_loss": 0.2680290937423706,
+ "eval_pr_auc": 0.6938544134850919,
+ "eval_precision": 0.7107548430193721,
+ "eval_precision_macro": 0.8262002594609874,
+ "eval_pred_class_0": 16674,
+ "eval_pred_class_1": 2994,
+ "eval_predicted_binding_ratio": 0.15222696766320928,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8169788291091336,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.353,
+ "eval_steps_per_second": 3.916,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8918
+ },
+ {
+ "epoch": 344.0,
+ "eval_accuracy": 0.9064470205409803,
+ "eval_auc": 0.9326493359019604,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6982617251557888,
+ "eval_f1_macro": 0.8214517001734176,
+ "eval_loss": 0.2680736482143402,
+ "eval_pr_auc": 0.6938105747317047,
+ "eval_precision": 0.710377043710377,
+ "eval_precision_macro": 0.8260361014844849,
+ "eval_pred_class_0": 16671,
+ "eval_pred_class_1": 2997,
+ "eval_predicted_binding_ratio": 0.15237949969493594,
+ "eval_recall": 0.6865527249274428,
+ "eval_recall_macro": 0.8170797064608241,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.605,
+ "eval_steps_per_second": 3.869,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8944
+ },
+ {
+ "epoch": 345.0,
+ "eval_accuracy": 0.9063961765304047,
+ "eval_auc": 0.9326923535473509,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6977507798391068,
+ "eval_f1_macro": 0.8211870157279457,
+ "eval_loss": 0.2681424021720886,
+ "eval_pr_auc": 0.6938928037843046,
+ "eval_precision": 0.7107023411371237,
+ "eval_precision_macro": 0.8260910674386901,
+ "eval_pred_class_0": 16678,
+ "eval_pred_class_1": 2990,
+ "eval_predicted_binding_ratio": 0.15202359162090706,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.816525294657732,
+ "eval_runtime": 0.2453,
+ "eval_samples_per_second": 664.523,
+ "eval_steps_per_second": 4.077,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8970
+ },
+ {
+ "epoch": 346.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9327347288478101,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6979799638692724,
+ "eval_f1_macro": 0.821333351261192,
+ "eval_loss": 0.2682046592235565,
+ "eval_pr_auc": 0.6939995353512864,
+ "eval_precision": 0.7111780455153949,
+ "eval_precision_macro": 0.8263324280334768,
+ "eval_pred_class_0": 16680,
+ "eval_pred_class_1": 2988,
+ "eval_predicted_binding_ratio": 0.15192190359975594,
+ "eval_recall": 0.6852628184456627,
+ "eval_recall_macro": 0.8165856556162641,
+ "eval_runtime": 0.2656,
+ "eval_samples_per_second": 613.642,
+ "eval_steps_per_second": 3.765,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 8996
+ },
+ {
+ "epoch": 346.15384615384613,
+ "grad_norm": 18666.783203125,
+ "learning_rate": 2.618336781094791e-07,
+ "loss": 0.2031,
+ "step": 9000
+ },
+ {
+ "epoch": 347.0,
+ "eval_accuracy": 0.9062944885092536,
+ "eval_auc": 0.9326942611171465,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6983139630054018,
+ "eval_f1_macro": 0.8214235117341392,
+ "eval_loss": 0.2681069076061249,
+ "eval_pr_auc": 0.6938781737586003,
+ "eval_precision": 0.7091090425531915,
+ "eval_precision_macro": 0.8255029006283365,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.817513396346852,
+ "eval_runtime": 0.1933,
+ "eval_samples_per_second": 843.456,
+ "eval_steps_per_second": 5.175,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9022
+ },
+ {
+ "epoch": 348.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9326940859321652,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.26795056462287903,
+ "eval_pr_auc": 0.6940722215736992,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.287,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9048
+ },
+ {
+ "epoch": 349.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9327572303853989,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.267974317073822,
+ "eval_pr_auc": 0.6943572217621294,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.2638,
+ "eval_samples_per_second": 617.819,
+ "eval_steps_per_second": 3.79,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9074
+ },
+ {
+ "epoch": 350.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327859996523161,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6985064828491712,
+ "eval_f1_macro": 0.8216233644580062,
+ "eval_loss": 0.2680947780609131,
+ "eval_pr_auc": 0.6943020020881897,
+ "eval_precision": 0.7112299465240641,
+ "eval_precision_macro": 0.8264413105131714,
+ "eval_pred_class_0": 16676,
+ "eval_pred_class_1": 2992,
+ "eval_predicted_binding_ratio": 0.15212527964205816,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170391900676656,
+ "eval_runtime": 0.2408,
+ "eval_samples_per_second": 676.86,
+ "eval_steps_per_second": 4.153,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9100
+ },
+ {
+ "epoch": 351.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9328183504788495,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6986211424819435,
+ "eval_f1_macro": 0.8216965657061384,
+ "eval_loss": 0.2681223750114441,
+ "eval_pr_auc": 0.694450730809475,
+ "eval_precision": 0.7114677365429622,
+ "eval_precision_macro": 0.8265619548577976,
+ "eval_pred_class_0": 16677,
+ "eval_pred_class_1": 2991,
+ "eval_predicted_binding_ratio": 0.15207443563148262,
+ "eval_recall": 0.6862302483069977,
+ "eval_recall_macro": 0.8170693705469316,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.919,
+ "eval_steps_per_second": 3.901,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9126
+ },
+ {
+ "epoch": 352.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9328099415997506,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991310050828005,
+ "eval_f1_macro": 0.8219607247335354,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6945293388795055,
+ "eval_precision": 0.7111407605070047,
+ "eval_precision_macro": 0.8265061930909349,
+ "eval_pred_class_0": 16670,
+ "eval_pred_class_1": 2998,
+ "eval_predicted_binding_ratio": 0.15243034370551148,
+ "eval_recall": 0.6875201547887778,
+ "eval_recall_macro": 0.8176237823500236,
+ "eval_runtime": 0.1729,
+ "eval_samples_per_second": 942.612,
+ "eval_steps_per_second": 5.783,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9152
+ },
+ {
+ "epoch": 353.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9328013186056745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000819000819001,
+ "eval_f1_macro": 0.8224913728389398,
+ "eval_loss": 0.26793336868286133,
+ "eval_pr_auc": 0.6945633927009858,
+ "eval_precision": 0.7113848202396804,
+ "eval_precision_macro": 0.8267677821793697,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183997934218701,
+ "eval_runtime": 0.2553,
+ "eval_samples_per_second": 638.431,
+ "eval_steps_per_second": 3.917,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9178
+ },
+ {
+ "epoch": 354.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.932783060437631,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26802781224250793,
+ "eval_pr_auc": 0.694307607889245,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.039,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9204
+ },
+ {
+ "epoch": 355.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9327856687473517,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997875469848014,
+ "eval_f1_macro": 0.8222422697443199,
+ "eval_loss": 0.2679577171802521,
+ "eval_pr_auc": 0.6944152708065687,
+ "eval_precision": 0.7094102054340623,
+ "eval_precision_macro": 0.8258762738882024,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8187429418701,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.379,
+ "eval_steps_per_second": 5.352,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9230
+ },
+ {
+ "epoch": 356.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9328543023299973,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26801130175590515,
+ "eval_pr_auc": 0.6946173571124205,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1679,
+ "eval_samples_per_second": 971.014,
+ "eval_steps_per_second": 5.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9256
+ },
+ {
+ "epoch": 357.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328912663610364,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002947920078612,
+ "eval_f1_macro": 0.8226120363891247,
+ "eval_loss": 0.26803824305534363,
+ "eval_pr_auc": 0.6948368351435844,
+ "eval_precision": 0.7114808652246256,
+ "eval_precision_macro": 0.8268440754137292,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8185610317320926,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.12,
+ "eval_steps_per_second": 3.841,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9282
+ },
+ {
+ "epoch": 358.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9328766286826047,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003929273084479,
+ "eval_f1_macro": 0.8226594466805872,
+ "eval_loss": 0.2680445909500122,
+ "eval_pr_auc": 0.6947543185413181,
+ "eval_precision": 0.711340206185567,
+ "eval_precision_macro": 0.8268002873554328,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186920895630492,
+ "eval_runtime": 0.2229,
+ "eval_samples_per_second": 731.38,
+ "eval_steps_per_second": 4.487,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9308
+ },
+ {
+ "epoch": 359.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9328791980623294,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.2679673135280609,
+ "eval_pr_auc": 0.694877850403851,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1766,
+ "eval_samples_per_second": 922.941,
+ "eval_steps_per_second": 5.662,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9334
+ },
+ {
+ "epoch": 360.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9328811640271188,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680214047431946,
+ "eval_pr_auc": 0.6947522638420174,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.2235,
+ "eval_samples_per_second": 729.222,
+ "eval_steps_per_second": 4.474,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9360
+ },
+ {
+ "epoch": 361.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.932900882070006,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000163478829492,
+ "eval_f1_macro": 0.8223884382480462,
+ "eval_loss": 0.26796379685401917,
+ "eval_pr_auc": 0.6949176089109205,
+ "eval_precision": 0.7098806366047745,
+ "eval_precision_macro": 0.8261149519800236,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188033028286321,
+ "eval_runtime": 0.1694,
+ "eval_samples_per_second": 962.09,
+ "eval_steps_per_second": 5.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9386
+ },
+ {
+ "epoch": 362.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9329447561753046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26795387268066406,
+ "eval_pr_auc": 0.6950891938157696,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2571,
+ "eval_samples_per_second": 633.968,
+ "eval_steps_per_second": 3.889,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9412
+ },
+ {
+ "epoch": 363.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329731556072616,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001636661211129,
+ "eval_f1_macro": 0.8225130616165066,
+ "eval_loss": 0.26798126101493835,
+ "eval_pr_auc": 0.6952376733836106,
+ "eval_precision": 0.7108673978065803,
+ "eval_precision_macro": 0.8265604171937038,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818631728604517,
+ "eval_runtime": 0.2635,
+ "eval_samples_per_second": 618.549,
+ "eval_steps_per_second": 3.795,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9438
+ },
+ {
+ "epoch": 364.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9329669754704237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008994276369583,
+ "eval_f1_macro": 0.8229219452383643,
+ "eval_loss": 0.26792433857917786,
+ "eval_pr_auc": 0.6952423959524336,
+ "eval_precision": 0.7110152621101526,
+ "eval_precision_macro": 0.8267457720422265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8192465013661412,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.085,
+ "eval_steps_per_second": 3.847,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9464
+ },
+ {
+ "epoch": 365.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9329833552661686,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.2679993808269501,
+ "eval_pr_auc": 0.6952272078548911,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.2598,
+ "eval_samples_per_second": 627.317,
+ "eval_steps_per_second": 3.849,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9490
+ },
+ {
+ "epoch": 365.38461538461536,
+ "grad_norm": 18768.416015625,
+ "learning_rate": 2.0513069380006943e-07,
+ "loss": 0.2014,
+ "step": 9500
+ },
+ {
+ "epoch": 366.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330138179879044,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003763704794632,
+ "eval_f1_macro": 0.822633632944773,
+ "eval_loss": 0.26798829436302185,
+ "eval_pr_auc": 0.6953770669982303,
+ "eval_precision": 0.7109634551495017,
+ "eval_precision_macro": 0.8266367281750631,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187929669147396,
+ "eval_runtime": 0.198,
+ "eval_samples_per_second": 823.426,
+ "eval_steps_per_second": 5.052,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9516
+ },
+ {
+ "epoch": 367.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9330281442263691,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005890052356021,
+ "eval_f1_macro": 0.8227541703278901,
+ "eval_loss": 0.2679848372936249,
+ "eval_pr_auc": 0.6954512195878219,
+ "eval_precision": 0.7110594486881435,
+ "eval_precision_macro": 0.8267130106501293,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189542052249621,
+ "eval_runtime": 0.265,
+ "eval_samples_per_second": 615.184,
+ "eval_steps_per_second": 3.774,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9542
+ },
+ {
+ "epoch": 368.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330502564639999,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2679852545261383,
+ "eval_pr_auc": 0.6955032656447977,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2572,
+ "eval_samples_per_second": 633.651,
+ "eval_steps_per_second": 3.887,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9568
+ },
+ {
+ "epoch": 369.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9330669768972081,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680181562900543,
+ "eval_pr_auc": 0.6955153529738297,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2378,
+ "eval_samples_per_second": 685.421,
+ "eval_steps_per_second": 4.205,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9594
+ },
+ {
+ "epoch": 370.0,
+ "eval_accuracy": 0.9070063046573114,
+ "eval_auc": 0.9330907631246593,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7008015704236872,
+ "eval_f1_macro": 0.8228746737830142,
+ "eval_loss": 0.26799651980400085,
+ "eval_pr_auc": 0.6956635287744017,
+ "eval_precision": 0.7111553784860558,
+ "eval_precision_macro": 0.8267892646512892,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8191154435351846,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.529,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9620
+ },
+ {
+ "epoch": 371.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331252940309591,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999672453324599,
+ "eval_f1_macro": 0.8224181697622275,
+ "eval_loss": 0.26801252365112305,
+ "eval_pr_auc": 0.6958269523241747,
+ "eval_precision": 0.7111480865224625,
+ "eval_precision_macro": 0.8266476794611952,
+ "eval_pred_class_0": 16663,
+ "eval_pred_class_1": 3005,
+ "eval_predicted_binding_ratio": 0.15278625177954036,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8183696129426041,
+ "eval_runtime": 0.2596,
+ "eval_samples_per_second": 627.791,
+ "eval_steps_per_second": 3.851,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9646
+ },
+ {
+ "epoch": 372.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9331262672808548,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7005723630417008,
+ "eval_f1_macro": 0.8227283114988764,
+ "eval_loss": 0.26803725957870483,
+ "eval_pr_auc": 0.6957878915651574,
+ "eval_precision": 0.7106834771068348,
+ "eval_precision_macro": 0.8265498567232265,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190550825766527,
+ "eval_runtime": 0.2654,
+ "eval_samples_per_second": 614.266,
+ "eval_steps_per_second": 3.769,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9672
+ },
+ {
+ "epoch": 373.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9331573528825239,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26807889342308044,
+ "eval_pr_auc": 0.6959044832644976,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2659,
+ "eval_samples_per_second": 613.113,
+ "eval_steps_per_second": 3.761,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9698
+ },
+ {
+ "epoch": 374.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331759224905339,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.2681734561920166,
+ "eval_pr_auc": 0.695850524773773,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2603,
+ "eval_samples_per_second": 626.284,
+ "eval_steps_per_second": 3.842,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9724
+ },
+ {
+ "epoch": 375.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933143698186487,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2681812345981598,
+ "eval_pr_auc": 0.6956723886102156,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 914.221,
+ "eval_steps_per_second": 5.609,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9750
+ },
+ {
+ "epoch": 376.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9331405935193198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002123835974514,
+ "eval_f1_macro": 0.8224831299290885,
+ "eval_loss": 0.2681320309638977,
+ "eval_pr_auc": 0.6957066245846253,
+ "eval_precision": 0.7096026490066225,
+ "eval_precision_macro": 0.8260290996114323,
+ "eval_pred_class_0": 16648,
+ "eval_pred_class_1": 3020,
+ "eval_predicted_binding_ratio": 0.1535489119381737,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.819065418490545,
+ "eval_runtime": 0.2617,
+ "eval_samples_per_second": 622.75,
+ "eval_steps_per_second": 3.821,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9776
+ },
+ {
+ "epoch": 377.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9331510656881976,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7010443864229765,
+ "eval_f1_macro": 0.8229384784439624,
+ "eval_loss": 0.2682173550128937,
+ "eval_pr_auc": 0.6955658216411763,
+ "eval_precision": 0.709613478691774,
+ "eval_precision_macro": 0.8261726428372638,
+ "eval_pred_class_0": 16641,
+ "eval_pred_class_1": 3027,
+ "eval_predicted_binding_ratio": 0.15390482001220257,
+ "eval_recall": 0.6926797807158981,
+ "eval_recall_macro": 0.8198112490831255,
+ "eval_runtime": 0.2446,
+ "eval_samples_per_second": 666.285,
+ "eval_steps_per_second": 4.088,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9802
+ },
+ {
+ "epoch": 378.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9331855381995037,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994767822105952,
+ "eval_f1_macro": 0.8220743333087894,
+ "eval_loss": 0.2681441009044647,
+ "eval_pr_auc": 0.6959256270686769,
+ "eval_precision": 0.709452736318408,
+ "eval_precision_macro": 0.8258426835378145,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184506457289211,
+ "eval_runtime": 0.2608,
+ "eval_samples_per_second": 625.04,
+ "eval_steps_per_second": 3.835,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9828
+ },
+ {
+ "epoch": 379.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9331895674540718,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003267973856209,
+ "eval_f1_macro": 0.8225562214288413,
+ "eval_loss": 0.26809969544410706,
+ "eval_pr_auc": 0.6959934558189067,
+ "eval_precision": 0.7098376946008612,
+ "eval_precision_macro": 0.8261483505739005,
+ "eval_pred_class_0": 16649,
+ "eval_pred_class_1": 3019,
+ "eval_predicted_binding_ratio": 0.15349806792759813,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8190955989698111,
+ "eval_runtime": 0.2612,
+ "eval_samples_per_second": 624.005,
+ "eval_steps_per_second": 3.828,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9854
+ },
+ {
+ "epoch": 380.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332175581210724,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26810789108276367,
+ "eval_pr_auc": 0.6960299382483204,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.1782,
+ "eval_samples_per_second": 914.547,
+ "eval_steps_per_second": 5.611,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9880
+ },
+ {
+ "epoch": 381.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332336167443518,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.2680502235889435,
+ "eval_pr_auc": 0.6962912618518755,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2534,
+ "eval_samples_per_second": 643.169,
+ "eval_steps_per_second": 3.946,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9906
+ },
+ {
+ "epoch": 382.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332394951737218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26803064346313477,
+ "eval_pr_auc": 0.696271279785013,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.1775,
+ "eval_samples_per_second": 918.497,
+ "eval_steps_per_second": 5.635,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9932
+ },
+ {
+ "epoch": 383.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332396703587031,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2680654227733612,
+ "eval_pr_auc": 0.6962410474404584,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2478,
+ "eval_samples_per_second": 657.85,
+ "eval_steps_per_second": 4.036,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9958
+ },
+ {
+ "epoch": 384.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332586000691747,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.26804468035697937,
+ "eval_pr_auc": 0.6963737898708651,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.1642,
+ "eval_samples_per_second": 992.895,
+ "eval_steps_per_second": 6.091,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 9984
+ },
+ {
+ "epoch": 384.61538461538464,
+ "grad_norm": 19506.416015625,
+ "learning_rate": 1.5373466155541264e-07,
+ "loss": 0.1999,
+ "step": 10000
+ },
+ {
+ "epoch": 385.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332885664334637,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26817384362220764,
+ "eval_pr_auc": 0.6963813810367415,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2609,
+ "eval_samples_per_second": 624.846,
+ "eval_steps_per_second": 3.833,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10010
+ },
+ {
+ "epoch": 386.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332890919884074,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995251350908793,
+ "eval_f1_macro": 0.8221511437890823,
+ "eval_loss": 0.2681432366371155,
+ "eval_pr_auc": 0.6963686032935126,
+ "eval_precision": 0.7105788423153693,
+ "eval_precision_macro": 0.8263313128873689,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181480136738496,
+ "eval_runtime": 0.2677,
+ "eval_samples_per_second": 609.003,
+ "eval_steps_per_second": 3.736,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10036
+ },
+ {
+ "epoch": 387.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332816368942063,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681044936180115,
+ "eval_pr_auc": 0.6963340299375516,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2503,
+ "eval_samples_per_second": 651.154,
+ "eval_steps_per_second": 3.995,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10062
+ },
+ {
+ "epoch": 388.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9332853547088078,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26804089546203613,
+ "eval_pr_auc": 0.6964477494759991,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.349,
+ "eval_steps_per_second": 3.794,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10088
+ },
+ {
+ "epoch": 389.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333026785569515,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26805874705314636,
+ "eval_pr_auc": 0.6965416515768459,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1895,
+ "eval_samples_per_second": 860.093,
+ "eval_steps_per_second": 5.277,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10114
+ },
+ {
+ "epoch": 390.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9332940944928713,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26800334453582764,
+ "eval_pr_auc": 0.6965549091166009,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2601,
+ "eval_samples_per_second": 626.7,
+ "eval_steps_per_second": 3.845,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10140
+ },
+ {
+ "epoch": 391.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9332915835081403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699591169255928,
+ "eval_f1_macro": 0.8221474102804127,
+ "eval_loss": 0.26801303029060364,
+ "eval_pr_auc": 0.69649915263674,
+ "eval_precision": 0.7096881220968813,
+ "eval_precision_macro": 0.8259621107662262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8184808262081871,
+ "eval_runtime": 0.2322,
+ "eval_samples_per_second": 702.068,
+ "eval_steps_per_second": 4.307,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10166
+ },
+ {
+ "epoch": 392.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9333394576705105,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680598199367523,
+ "eval_pr_auc": 0.6966844521188784,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2619,
+ "eval_samples_per_second": 622.281,
+ "eval_steps_per_second": 3.818,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10192
+ },
+ {
+ "epoch": 393.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333311363839021,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26812389492988586,
+ "eval_pr_auc": 0.696579843318821,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2606,
+ "eval_samples_per_second": 625.459,
+ "eval_steps_per_second": 3.837,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10218
+ },
+ {
+ "epoch": 394.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9333747574442278,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990990990990991,
+ "eval_f1_macro": 0.8219096951966862,
+ "eval_loss": 0.26815417408943176,
+ "eval_pr_auc": 0.6966758102563304,
+ "eval_precision": 0.7103861517976032,
+ "eval_precision_macro": 0.8261784335560267,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6881651080296678,
+ "eval_recall_macro": 0.8178255370534045,
+ "eval_runtime": 0.2489,
+ "eval_samples_per_second": 654.797,
+ "eval_steps_per_second": 4.017,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10244
+ },
+ {
+ "epoch": 395.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.93337473797923,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26812419295310974,
+ "eval_pr_auc": 0.6967071460749926,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2546,
+ "eval_samples_per_second": 640.225,
+ "eval_steps_per_second": 3.928,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10270
+ },
+ {
+ "epoch": 396.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333609762257046,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6966270582247817,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1654,
+ "eval_samples_per_second": 985.392,
+ "eval_steps_per_second": 6.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10296
+ },
+ {
+ "epoch": 397.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9333962857319209,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.268099308013916,
+ "eval_pr_auc": 0.6968255966064625,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.1974,
+ "eval_samples_per_second": 825.595,
+ "eval_steps_per_second": 5.065,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10322
+ },
+ {
+ "epoch": 398.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9334254248337986,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6987714987714988,
+ "eval_f1_macro": 0.8217158026492684,
+ "eval_loss": 0.26809167861938477,
+ "eval_pr_auc": 0.6970114900505864,
+ "eval_precision": 0.7100532623169108,
+ "eval_precision_macro": 0.8259819840149124,
+ "eval_pred_class_0": 16664,
+ "eval_pred_class_1": 3004,
+ "eval_predicted_binding_ratio": 0.15273540776896483,
+ "eval_recall": 0.6878426314092229,
+ "eval_recall_macro": 0.8176341182639161,
+ "eval_runtime": 0.1847,
+ "eval_samples_per_second": 882.621,
+ "eval_steps_per_second": 5.415,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10348
+ },
+ {
+ "epoch": 399.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334154587548664,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994928840176673,
+ "eval_f1_macro": 0.8220999320609059,
+ "eval_loss": 0.26815035939216614,
+ "eval_pr_auc": 0.6968381832516852,
+ "eval_precision": 0.7098273572377158,
+ "eval_precision_macro": 0.8260051771779358,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183497683772306,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.281,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10374
+ },
+ {
+ "epoch": 400.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334343592678412,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6991976420501065,
+ "eval_f1_macro": 0.821957303073866,
+ "eval_loss": 0.26815110445022583,
+ "eval_pr_auc": 0.6969254623478301,
+ "eval_precision": 0.7102461743180306,
+ "eval_precision_macro": 0.826134970486347,
+ "eval_pred_class_0": 16662,
+ "eval_pred_class_1": 3006,
+ "eval_predicted_binding_ratio": 0.15283709579011592,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.817956594884361,
+ "eval_runtime": 0.2607,
+ "eval_samples_per_second": 625.319,
+ "eval_steps_per_second": 3.836,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10400
+ },
+ {
+ "epoch": 401.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334508655860725,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6992961204779833,
+ "eval_f1_macro": 0.8220048784892098,
+ "eval_loss": 0.2681412398815155,
+ "eval_pr_auc": 0.6970213943546584,
+ "eval_precision": 0.7101063829787234,
+ "eval_precision_macro": 0.8260916068555082,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8180876527153176,
+ "eval_runtime": 0.2502,
+ "eval_samples_per_second": 651.598,
+ "eval_steps_per_second": 3.998,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10426
+ },
+ {
+ "epoch": 402.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334578048578289,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6996235063021771,
+ "eval_f1_macro": 0.8221986674075668,
+ "eval_loss": 0.26816073060035706,
+ "eval_pr_auc": 0.697032431219364,
+ "eval_precision": 0.710438829787234,
+ "eval_precision_macro": 0.8262878422645654,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.818279071504806,
+ "eval_runtime": 0.2131,
+ "eval_samples_per_second": 764.767,
+ "eval_steps_per_second": 4.692,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10452
+ },
+ {
+ "epoch": 403.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9334644910846125,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6971117657586055,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.1917,
+ "eval_samples_per_second": 850.439,
+ "eval_steps_per_second": 5.217,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10478
+ },
+ {
+ "epoch": 403.84615384615387,
+ "grad_norm": 20065.328125,
+ "learning_rate": 1.0857058873879127e-07,
+ "loss": 0.1991,
+ "step": 10500
+ },
+ {
+ "epoch": 404.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334760435608745,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6994106090373281,
+ "eval_f1_macro": 0.8220780022434744,
+ "eval_loss": 0.26816511154174805,
+ "eval_pr_auc": 0.6971952135976213,
+ "eval_precision": 0.71034253408713,
+ "eval_precision_macro": 0.8262114206958068,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6888100612705579,
+ "eval_recall_macro": 0.8181178331945835,
+ "eval_runtime": 0.2491,
+ "eval_samples_per_second": 654.311,
+ "eval_steps_per_second": 4.014,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10504
+ },
+ {
+ "epoch": 405.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334837419675497,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.26816821098327637,
+ "eval_pr_auc": 0.6972179050703514,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2552,
+ "eval_samples_per_second": 638.823,
+ "eval_steps_per_second": 3.919,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10530
+ },
+ {
+ "epoch": 406.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9334893478869489,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997218131238749,
+ "eval_f1_macro": 0.8222461586311625,
+ "eval_loss": 0.2681950330734253,
+ "eval_pr_auc": 0.6972103120395237,
+ "eval_precision": 0.7102990033222591,
+ "eval_precision_macro": 0.8262444710452093,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184101293357626,
+ "eval_runtime": 0.2003,
+ "eval_samples_per_second": 813.636,
+ "eval_steps_per_second": 4.992,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10556
+ },
+ {
+ "epoch": 407.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334909050867821,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2681744396686554,
+ "eval_pr_auc": 0.6972816477778223,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.723,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10582
+ },
+ {
+ "epoch": 408.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933494973271346,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6995090016366612,
+ "eval_f1_macro": 0.8221255355501671,
+ "eval_loss": 0.2681480348110199,
+ "eval_pr_auc": 0.697380910091478,
+ "eval_precision": 0.7102027251578598,
+ "eval_precision_macro": 0.8261680532566416,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182488910255401,
+ "eval_runtime": 0.2536,
+ "eval_samples_per_second": 642.704,
+ "eval_steps_per_second": 3.943,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10608
+ },
+ {
+ "epoch": 409.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334896009319218,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680969536304474,
+ "eval_pr_auc": 0.6974108729960042,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2261,
+ "eval_samples_per_second": 721.003,
+ "eval_steps_per_second": 4.423,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10634
+ },
+ {
+ "epoch": 410.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9334926569365942,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699607329842932,
+ "eval_f1_macro": 0.8221730364601127,
+ "eval_loss": 0.26808932423591614,
+ "eval_pr_auc": 0.6974463532877748,
+ "eval_precision": 0.7100631019594819,
+ "eval_precision_macro": 0.8261247850555049,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8183799488564967,
+ "eval_runtime": 0.2604,
+ "eval_samples_per_second": 625.923,
+ "eval_steps_per_second": 3.84,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10660
+ },
+ {
+ "epoch": 411.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9334912943867403,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2680690288543701,
+ "eval_pr_auc": 0.6974656777279113,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.181,
+ "eval_samples_per_second": 900.801,
+ "eval_steps_per_second": 5.526,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10686
+ },
+ {
+ "epoch": 412.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334969197711376,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26803234219551086,
+ "eval_pr_auc": 0.6975509824558107,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.403,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10712
+ },
+ {
+ "epoch": 413.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9334973090710958,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7000490918016691,
+ "eval_f1_macro": 0.8224398957879677,
+ "eval_loss": 0.26804181933403015,
+ "eval_pr_auc": 0.697539016898834,
+ "eval_precision": 0.7106312292358804,
+ "eval_precision_macro": 0.8264405996101362,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8186015481252511,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.163,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10738
+ },
+ {
+ "epoch": 414.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335163069090602,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998363338788871,
+ "eval_f1_macro": 0.822319298583337,
+ "eval_loss": 0.2680352032184601,
+ "eval_pr_auc": 0.697653341121327,
+ "eval_precision": 0.71053506148222,
+ "eval_precision_macro": 0.8263642352251727,
+ "eval_pred_class_0": 16659,
+ "eval_pred_class_1": 3009,
+ "eval_predicted_binding_ratio": 0.15298962782184258,
+ "eval_recall": 0.689455014511448,
+ "eval_recall_macro": 0.8184403098150286,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.004,
+ "eval_steps_per_second": 4.184,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10764
+ },
+ {
+ "epoch": 415.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335094357647963,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.2680439054965973,
+ "eval_pr_auc": 0.6975754183405896,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2547,
+ "eval_samples_per_second": 640.063,
+ "eval_steps_per_second": 3.927,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10790
+ },
+ {
+ "epoch": 416.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335170855089767,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26802244782447815,
+ "eval_pr_auc": 0.6976835126920541,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.1858,
+ "eval_samples_per_second": 877.122,
+ "eval_steps_per_second": 5.381,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10816
+ },
+ {
+ "epoch": 417.0,
+ "eval_accuracy": 0.9069046166361603,
+ "eval_auc": 0.9335191877287514,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004743988221822,
+ "eval_f1_macro": 0.8226809883524872,
+ "eval_loss": 0.26805901527404785,
+ "eval_pr_auc": 0.6976076287719296,
+ "eval_precision": 0.7108233731739708,
+ "eval_precision_macro": 0.8265932427829508,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8189240247456961,
+ "eval_runtime": 0.2586,
+ "eval_samples_per_second": 630.253,
+ "eval_steps_per_second": 3.867,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10842
+ },
+ {
+ "epoch": 418.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335267206829443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002617801047121,
+ "eval_f1_macro": 0.8225604590386311,
+ "eval_loss": 0.26806166768074036,
+ "eval_pr_auc": 0.6976721144908643,
+ "eval_precision": 0.7107273331119229,
+ "eval_precision_macro": 0.8265169354519211,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187627864354736,
+ "eval_runtime": 0.1992,
+ "eval_samples_per_second": 818.364,
+ "eval_steps_per_second": 5.021,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10868
+ },
+ {
+ "epoch": 419.0,
+ "eval_accuracy": 0.9069554606467358,
+ "eval_auc": 0.9335306331475249,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7006869479882237,
+ "eval_f1_macro": 0.8228014837466855,
+ "eval_loss": 0.2680812180042267,
+ "eval_pr_auc": 0.6976481882085733,
+ "eval_precision": 0.7109193494855626,
+ "eval_precision_macro": 0.8266695216356063,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190852630559187,
+ "eval_runtime": 0.2498,
+ "eval_samples_per_second": 652.627,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10894
+ },
+ {
+ "epoch": 420.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335429739562026,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6976080200264206,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2232,
+ "eval_samples_per_second": 730.143,
+ "eval_steps_per_second": 4.479,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10920
+ },
+ {
+ "epoch": 421.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9335431686061818,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681698799133301,
+ "eval_pr_auc": 0.6975308484626278,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1913,
+ "eval_samples_per_second": 852.153,
+ "eval_steps_per_second": 5.228,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10946
+ },
+ {
+ "epoch": 422.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335561517597906,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681558430194855,
+ "eval_pr_auc": 0.6975926126749412,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.252,
+ "eval_samples_per_second": 646.818,
+ "eval_steps_per_second": 3.968,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10972
+ },
+ {
+ "epoch": 423.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335701957557857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.2681851089000702,
+ "eval_pr_auc": 0.6976584248764129,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2518,
+ "eval_samples_per_second": 647.463,
+ "eval_steps_per_second": 3.972,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 10998
+ },
+ {
+ "epoch": 423.0769230769231,
+ "grad_norm": 19880.513671875,
+ "learning_rate": 7.045132214180816e-08,
+ "loss": 0.198,
+ "step": 11000
+ },
+ {
+ "epoch": 424.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933567850223537,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.2681238353252411,
+ "eval_pr_auc": 0.6977041251052366,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1802,
+ "eval_samples_per_second": 904.736,
+ "eval_steps_per_second": 5.551,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11024
+ },
+ {
+ "epoch": 425.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9335591688344673,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26810285449028015,
+ "eval_pr_auc": 0.6976771787125668,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2422,
+ "eval_samples_per_second": 673.038,
+ "eval_steps_per_second": 4.129,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11050
+ },
+ {
+ "epoch": 426.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335456990559106,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700228832951945,
+ "eval_f1_macro": 0.8225089013937882,
+ "eval_loss": 0.2680869400501251,
+ "eval_pr_auc": 0.6976467269760291,
+ "eval_precision": 0.7099767981438515,
+ "eval_precision_macro": 0.8261913298268353,
+ "eval_pred_class_0": 16651,
+ "eval_pred_class_1": 3017,
+ "eval_predicted_binding_ratio": 0.15339637990644703,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189645411388546,
+ "eval_runtime": 0.1727,
+ "eval_samples_per_second": 943.897,
+ "eval_steps_per_second": 5.791,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11076
+ },
+ {
+ "epoch": 427.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335531541501119,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004412485700278,
+ "eval_f1_macro": 0.8226293306702985,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6976292182478663,
+ "eval_precision": 0.7100728959575878,
+ "eval_precision_macro": 0.8262676792100252,
+ "eval_pred_class_0": 16650,
+ "eval_pred_class_1": 3018,
+ "eval_predicted_binding_ratio": 0.15344722391702256,
+ "eval_recall": 0.691067397613673,
+ "eval_recall_macro": 0.8191257794490772,
+ "eval_runtime": 0.2563,
+ "eval_samples_per_second": 635.952,
+ "eval_steps_per_second": 3.902,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11102
+ },
+ {
+ "epoch": 428.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335772907475254,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813971996307373,
+ "eval_pr_auc": 0.6977601654966087,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2428,
+ "eval_samples_per_second": 671.434,
+ "eval_steps_per_second": 4.119,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11128
+ },
+ {
+ "epoch": 429.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.9335880938213678,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26816368103027344,
+ "eval_pr_auc": 0.6977910817778257,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.1821,
+ "eval_samples_per_second": 895.286,
+ "eval_steps_per_second": 5.493,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11154
+ },
+ {
+ "epoch": 430.0,
+ "eval_accuracy": 0.906599552572707,
+ "eval_auc": 0.9335897288811925,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6993945344460808,
+ "eval_f1_macro": 0.8220524214743572,
+ "eval_loss": 0.2681582272052765,
+ "eval_pr_auc": 0.6978031299323104,
+ "eval_precision": 0.7099667774086379,
+ "eval_precision_macro": 0.8260483424802825,
+ "eval_pred_class_0": 16658,
+ "eval_pred_class_1": 3010,
+ "eval_predicted_binding_ratio": 0.15304047183241815,
+ "eval_recall": 0.6891325378910029,
+ "eval_recall_macro": 0.8182187105462742,
+ "eval_runtime": 0.259,
+ "eval_samples_per_second": 629.36,
+ "eval_steps_per_second": 3.861,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11180
+ },
+ {
+ "epoch": 431.0,
+ "eval_accuracy": 0.9065487085621314,
+ "eval_auc": 0.9336081038392237,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6990831696136215,
+ "eval_f1_macro": 0.8218841874311036,
+ "eval_loss": 0.2681788206100464,
+ "eval_pr_auc": 0.6978648412555615,
+ "eval_precision": 0.7100099767209843,
+ "eval_precision_macro": 0.8260151318092648,
+ "eval_pred_class_0": 16661,
+ "eval_pred_class_1": 3007,
+ "eval_predicted_binding_ratio": 0.15288793980069149,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8179264144050951,
+ "eval_runtime": 0.1932,
+ "eval_samples_per_second": 843.696,
+ "eval_steps_per_second": 5.176,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11206
+ },
+ {
+ "epoch": 432.0,
+ "eval_accuracy": 0.9064978645515558,
+ "eval_auc": 0.933607091659332,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6989687346537895,
+ "eval_f1_macro": 0.821811089570853,
+ "eval_loss": 0.26817184686660767,
+ "eval_pr_auc": 0.6978628373729787,
+ "eval_precision": 0.7097739361702128,
+ "eval_precision_macro": 0.825895371446451,
+ "eval_pred_class_0": 16660,
+ "eval_pred_class_1": 3008,
+ "eval_predicted_binding_ratio": 0.15293878381126702,
+ "eval_recall": 0.6884875846501128,
+ "eval_recall_macro": 0.8178962339258291,
+ "eval_runtime": 0.2181,
+ "eval_samples_per_second": 747.26,
+ "eval_steps_per_second": 4.584,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11232
+ },
+ {
+ "epoch": 433.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336009699174881,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26814383268356323,
+ "eval_pr_auc": 0.6978726222778764,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.2581,
+ "eval_samples_per_second": 631.607,
+ "eval_steps_per_second": 3.875,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11258
+ },
+ {
+ "epoch": 434.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9335997338901205,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7002452984464431,
+ "eval_f1_macro": 0.8225346777593885,
+ "eval_loss": 0.26813551783561707,
+ "eval_pr_auc": 0.6978611289947059,
+ "eval_precision": 0.7103516921035169,
+ "eval_precision_macro": 0.8263539414042262,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188636637871641,
+ "eval_runtime": 0.1828,
+ "eval_samples_per_second": 891.596,
+ "eval_steps_per_second": 5.47,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11284
+ },
+ {
+ "epoch": 435.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9335949455006336,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681216299533844,
+ "eval_pr_auc": 0.6978496657032133,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2595,
+ "eval_samples_per_second": 628.033,
+ "eval_steps_per_second": 3.853,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11310
+ },
+ {
+ "epoch": 436.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933600045330087,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001308044473512,
+ "eval_f1_macro": 0.8224615491231337,
+ "eval_loss": 0.2681162655353546,
+ "eval_pr_auc": 0.6978775373770459,
+ "eval_precision": 0.7101160862354893,
+ "eval_precision_macro": 0.8262344077367322,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188334833078981,
+ "eval_runtime": 0.2578,
+ "eval_samples_per_second": 632.163,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11336
+ },
+ {
+ "epoch": 437.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336053398095198,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26813197135925293,
+ "eval_pr_auc": 0.6979041519553001,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1888,
+ "eval_samples_per_second": 863.236,
+ "eval_steps_per_second": 5.296,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11362
+ },
+ {
+ "epoch": 438.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933606196269428,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813068985939026,
+ "eval_pr_auc": 0.6979019651404079,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2467,
+ "eval_samples_per_second": 660.854,
+ "eval_steps_per_second": 4.054,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11388
+ },
+ {
+ "epoch": 439.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336083958141924,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26813721656799316,
+ "eval_pr_auc": 0.6979009956604231,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1795,
+ "eval_samples_per_second": 908.065,
+ "eval_steps_per_second": 5.571,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11414
+ },
+ {
+ "epoch": 440.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336056512494865,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681269645690918,
+ "eval_pr_auc": 0.697889844022227,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2577,
+ "eval_samples_per_second": 632.569,
+ "eval_steps_per_second": 3.881,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11440
+ },
+ {
+ "epoch": 441.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336110917164033,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681290805339813,
+ "eval_pr_auc": 0.6979179015521837,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2643,
+ "eval_samples_per_second": 616.824,
+ "eval_steps_per_second": 3.784,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11466
+ },
+ {
+ "epoch": 442.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336152864234539,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681342363357544,
+ "eval_pr_auc": 0.697937589363129,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1746,
+ "eval_samples_per_second": 933.511,
+ "eval_steps_per_second": 5.727,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11492
+ },
+ {
+ "epoch": 442.3076923076923,
+ "grad_norm": 19259.90625,
+ "learning_rate": 4.0062918659231006e-08,
+ "loss": 0.1984,
+ "step": 11500
+ },
+ {
+ "epoch": 443.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336222548927073,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.26813840866088867,
+ "eval_pr_auc": 0.6979755820701472,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.189,
+ "eval_samples_per_second": 862.533,
+ "eval_steps_per_second": 5.292,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11518
+ },
+ {
+ "epoch": 444.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336241624625029,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6999182338511856,
+ "eval_f1_macro": 0.8223410440199006,
+ "eval_loss": 0.2681417763233185,
+ "eval_pr_auc": 0.6980183846984902,
+ "eval_precision": 0.710019907100199,
+ "eval_precision_macro": 0.8261580260852261,
+ "eval_pred_class_0": 16654,
+ "eval_pred_class_1": 3014,
+ "eval_predicted_binding_ratio": 0.15324384787472037,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8186722449976755,
+ "eval_runtime": 0.1783,
+ "eval_samples_per_second": 913.982,
+ "eval_steps_per_second": 5.607,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11544
+ },
+ {
+ "epoch": 445.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933623228142603,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812103390693665,
+ "eval_pr_auc": 0.6980203083536239,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.813,
+ "eval_steps_per_second": 4.06,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11570
+ },
+ {
+ "epoch": 446.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.933622994562628,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.26812058687210083,
+ "eval_pr_auc": 0.6980222288630917,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.25,
+ "eval_samples_per_second": 651.994,
+ "eval_steps_per_second": 4.0,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11596
+ },
+ {
+ "epoch": 447.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336267026447306,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681138813495636,
+ "eval_pr_auc": 0.6980542679927909,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 873.899,
+ "eval_steps_per_second": 5.361,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11622
+ },
+ {
+ "epoch": 448.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336341674714307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26811888813972473,
+ "eval_pr_auc": 0.6980920829430033,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2363,
+ "eval_samples_per_second": 689.772,
+ "eval_steps_per_second": 4.232,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11648
+ },
+ {
+ "epoch": 449.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336351017913307,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7001472272206772,
+ "eval_f1_macro": 0.8224873029219602,
+ "eval_loss": 0.26812025904655457,
+ "eval_pr_auc": 0.6980982354073687,
+ "eval_precision": 0.7104913678618858,
+ "eval_precision_macro": 0.8263972209146124,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187326059562077,
+ "eval_runtime": 0.2566,
+ "eval_samples_per_second": 635.262,
+ "eval_steps_per_second": 3.897,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11674
+ },
+ {
+ "epoch": 450.0,
+ "eval_accuracy": 0.9068029286150091,
+ "eval_auc": 0.9336350044663411,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003433055419324,
+ "eval_f1_macro": 0.8225820203316995,
+ "eval_loss": 0.2681255340576172,
+ "eval_pr_auc": 0.69809285211945,
+ "eval_precision": 0.7102122015915119,
+ "eval_precision_macro": 0.8263107608966447,
+ "eval_pred_class_0": 16652,
+ "eval_pred_class_1": 3016,
+ "eval_predicted_binding_ratio": 0.15334553589587147,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8189947216181206,
+ "eval_runtime": 0.1865,
+ "eval_samples_per_second": 874.141,
+ "eval_steps_per_second": 5.363,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11700
+ },
+ {
+ "epoch": 451.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336387320134417,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6981051991963814,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2522,
+ "eval_samples_per_second": 646.252,
+ "eval_steps_per_second": 3.965,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11726
+ },
+ {
+ "epoch": 452.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336409802207007,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7003598298985934,
+ "eval_f1_macro": 0.8226078241660808,
+ "eval_loss": 0.26812514662742615,
+ "eval_pr_auc": 0.6981289571890097,
+ "eval_precision": 0.7105874543644208,
+ "eval_precision_macro": 0.8264735530603251,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690422444372783,
+ "eval_recall_macro": 0.8188938442664302,
+ "eval_runtime": 0.1751,
+ "eval_samples_per_second": 930.856,
+ "eval_steps_per_second": 5.711,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11752
+ },
+ {
+ "epoch": 453.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336424400955443,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26812946796417236,
+ "eval_pr_auc": 0.6981354214954091,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2533,
+ "eval_samples_per_second": 643.502,
+ "eval_steps_per_second": 3.948,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11778
+ },
+ {
+ "epoch": 454.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336481628049311,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681373655796051,
+ "eval_pr_auc": 0.6981529233150263,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2333,
+ "eval_samples_per_second": 698.566,
+ "eval_steps_per_second": 4.286,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11804
+ },
+ {
+ "epoch": 455.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336448537552857,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815658807754517,
+ "eval_pr_auc": 0.6981319681162118,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1829,
+ "eval_samples_per_second": 891.289,
+ "eval_steps_per_second": 5.468,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11830
+ },
+ {
+ "epoch": 456.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.268153578042984,
+ "eval_pr_auc": 0.6981406301220767,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2528,
+ "eval_samples_per_second": 644.693,
+ "eval_steps_per_second": 3.955,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11856
+ },
+ {
+ "epoch": 457.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.933646138445148,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.268160343170166,
+ "eval_pr_auc": 0.6981424953255787,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2524,
+ "eval_samples_per_second": 645.855,
+ "eval_steps_per_second": 3.962,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11882
+ },
+ {
+ "epoch": 458.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336505959296704,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.26815977692604065,
+ "eval_pr_auc": 0.6981608628032375,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.1785,
+ "eval_samples_per_second": 913.407,
+ "eval_steps_per_second": 5.604,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11908
+ },
+ {
+ "epoch": 459.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336521336645056,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26815950870513916,
+ "eval_pr_auc": 0.6981611753342029,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1894,
+ "eval_samples_per_second": 860.719,
+ "eval_steps_per_second": 5.28,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11934
+ },
+ {
+ "epoch": 460.0,
+ "eval_accuracy": 0.9066503965832825,
+ "eval_auc": 0.9336499146547433,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6997055937193327,
+ "eval_f1_macro": 0.8222205050048714,
+ "eval_loss": 0.2681548595428467,
+ "eval_pr_auc": 0.6981527417806164,
+ "eval_precision": 0.7099236641221374,
+ "eval_precision_macro": 0.8260816159097628,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.818511006687453,
+ "eval_runtime": 0.2094,
+ "eval_samples_per_second": 778.376,
+ "eval_steps_per_second": 4.775,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11960
+ },
+ {
+ "epoch": 461.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336533599593742,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681526839733124,
+ "eval_pr_auc": 0.6981686335311912,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2574,
+ "eval_samples_per_second": 633.254,
+ "eval_steps_per_second": 3.885,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 11986
+ },
+ {
+ "epoch": 461.53846153846155,
+ "grad_norm": 19181.365234375,
+ "learning_rate": 1.7952297882945e-08,
+ "loss": 0.1977,
+ "step": 12000
+ },
+ {
+ "epoch": 462.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336556178991323,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26815271377563477,
+ "eval_pr_auc": 0.6981807528411922,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.2521,
+ "eval_samples_per_second": 646.614,
+ "eval_steps_per_second": 3.967,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12012
+ },
+ {
+ "epoch": 463.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336526397544513,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681511640548706,
+ "eval_pr_auc": 0.6981676703517014,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2048,
+ "eval_samples_per_second": 795.899,
+ "eval_steps_per_second": 4.883,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12038
+ },
+ {
+ "epoch": 464.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336516859695536,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814842224121094,
+ "eval_pr_auc": 0.698161790632896,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2527,
+ "eval_samples_per_second": 645.01,
+ "eval_steps_per_second": 3.957,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12064
+ },
+ {
+ "epoch": 465.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336534183543679,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.26814672350883484,
+ "eval_pr_auc": 0.6981725600302674,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.1868,
+ "eval_samples_per_second": 872.776,
+ "eval_steps_per_second": 5.354,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12090
+ },
+ {
+ "epoch": 466.0,
+ "eval_accuracy": 0.906701240593858,
+ "eval_auc": 0.9336528344044305,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.6998200556191723,
+ "eval_f1_macro": 0.822293617491433,
+ "eval_loss": 0.2681439220905304,
+ "eval_pr_auc": 0.6981719802867735,
+ "eval_precision": 0.7101593625498008,
+ "eval_precision_macro": 0.8262011990462741,
+ "eval_pred_class_0": 16656,
+ "eval_pred_class_1": 3012,
+ "eval_predicted_binding_ratio": 0.15314215985356924,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185411871667191,
+ "eval_runtime": 0.2538,
+ "eval_samples_per_second": 642.256,
+ "eval_steps_per_second": 3.94,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12116
+ },
+ {
+ "epoch": 467.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571167039716,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814955472946167,
+ "eval_pr_auc": 0.6981930050756842,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.251,
+ "eval_samples_per_second": 649.332,
+ "eval_steps_per_second": 3.984,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12142
+ },
+ {
+ "epoch": 468.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336574573414352,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.26814743876457214,
+ "eval_pr_auc": 0.6981957415820915,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1759,
+ "eval_samples_per_second": 926.564,
+ "eval_steps_per_second": 5.684,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12168
+ },
+ {
+ "epoch": 469.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336602894986317,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.699934554973822,
+ "eval_f1_macro": 0.8223667477493719,
+ "eval_loss": 0.2681457996368408,
+ "eval_pr_auc": 0.6982076318844164,
+ "eval_precision": 0.7103952175357025,
+ "eval_precision_macro": 0.8263208602537131,
+ "eval_pred_class_0": 16657,
+ "eval_pred_class_1": 3011,
+ "eval_predicted_binding_ratio": 0.1530913158429937,
+ "eval_recall": 0.6897774911318929,
+ "eval_recall_macro": 0.8185713676459851,
+ "eval_runtime": 0.1997,
+ "eval_samples_per_second": 816.139,
+ "eval_steps_per_second": 5.007,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12194
+ },
+ {
+ "epoch": 470.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336571945639633,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681419849395752,
+ "eval_pr_auc": 0.6981934072595471,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2541,
+ "eval_samples_per_second": 641.578,
+ "eval_steps_per_second": 3.936,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12220
+ },
+ {
+ "epoch": 471.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336578855713892,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681434154510498,
+ "eval_pr_auc": 0.6981968133129176,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2585,
+ "eval_samples_per_second": 630.471,
+ "eval_steps_per_second": 3.868,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12246
+ },
+ {
+ "epoch": 472.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336576422589153,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814115047454834,
+ "eval_pr_auc": 0.6981937092453367,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2611,
+ "eval_samples_per_second": 624.347,
+ "eval_steps_per_second": 3.83,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12272
+ },
+ {
+ "epoch": 473.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933659540096212,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26814183592796326,
+ "eval_pr_auc": 0.698207050020266,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1807,
+ "eval_samples_per_second": 902.095,
+ "eval_steps_per_second": 5.534,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12298
+ },
+ {
+ "epoch": 474.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933661126493542,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813554763793945,
+ "eval_pr_auc": 0.6982130837277154,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2369,
+ "eval_samples_per_second": 688.198,
+ "eval_steps_per_second": 4.222,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12324
+ },
+ {
+ "epoch": 475.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606204035962,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813259720802307,
+ "eval_pr_auc": 0.6982118437878516,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.1655,
+ "eval_samples_per_second": 984.988,
+ "eval_steps_per_second": 6.043,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12350
+ },
+ {
+ "epoch": 476.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.933660883181068,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813384890556335,
+ "eval_pr_auc": 0.6982130674055568,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2472,
+ "eval_samples_per_second": 659.287,
+ "eval_steps_per_second": 4.045,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12376
+ },
+ {
+ "epoch": 477.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336603284286276,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982114384730127,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.234,
+ "eval_samples_per_second": 696.525,
+ "eval_steps_per_second": 4.273,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12402
+ },
+ {
+ "epoch": 478.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606982635879,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26814013719558716,
+ "eval_pr_auc": 0.6982139708732891,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2492,
+ "eval_samples_per_second": 654.084,
+ "eval_steps_per_second": 4.013,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12428
+ },
+ {
+ "epoch": 479.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606593335921,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813676953315735,
+ "eval_pr_auc": 0.6982155837128797,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2089,
+ "eval_samples_per_second": 780.114,
+ "eval_steps_per_second": 4.786,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12454
+ },
+ {
+ "epoch": 480.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336606009385984,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813599467277527,
+ "eval_pr_auc": 0.6982237203295948,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.239,
+ "eval_samples_per_second": 682.114,
+ "eval_steps_per_second": 4.185,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12480
+ },
+ {
+ "epoch": 480.7692307692308,
+ "grad_norm": 19666.140625,
+ "learning_rate": 4.5173988392051e-09,
+ "loss": 0.1976,
+ "step": 12500
+ },
+ {
+ "epoch": 481.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336608247860743,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982221169303999,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2452,
+ "eval_samples_per_second": 664.649,
+ "eval_steps_per_second": 4.078,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12506
+ },
+ {
+ "epoch": 482.0,
+ "eval_accuracy": 0.9068537726255848,
+ "eval_auc": 0.9336611459585399,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.7004578155657293,
+ "eval_f1_macro": 0.822655157030306,
+ "eval_loss": 0.2681364417076111,
+ "eval_pr_auc": 0.6982243970162039,
+ "eval_precision": 0.7104477611940299,
+ "eval_precision_macro": 0.8264302698361911,
+ "eval_pred_class_0": 16653,
+ "eval_pred_class_1": 3015,
+ "eval_predicted_binding_ratio": 0.1532946918852959,
+ "eval_recall": 0.690744920993228,
+ "eval_recall_macro": 0.8190249020973867,
+ "eval_runtime": 0.2509,
+ "eval_samples_per_second": 649.641,
+ "eval_steps_per_second": 3.986,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12532
+ },
+ {
+ "epoch": 483.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621192084356,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681383192539215,
+ "eval_pr_auc": 0.6982286439182355,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.229,
+ "eval_samples_per_second": 711.694,
+ "eval_steps_per_second": 4.366,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12558
+ },
+ {
+ "epoch": 484.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336621970684273,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681386172771454,
+ "eval_pr_auc": 0.6982328773712362,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2516,
+ "eval_samples_per_second": 647.789,
+ "eval_steps_per_second": 3.974,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12584
+ },
+ {
+ "epoch": 485.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336618856284606,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.2681376338005066,
+ "eval_pr_auc": 0.6982308470568848,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.4219,
+ "eval_samples_per_second": 386.39,
+ "eval_steps_per_second": 2.37,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12610
+ },
+ {
+ "epoch": 486.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336622749284189,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813745498657227,
+ "eval_pr_auc": 0.6982319234202713,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2599,
+ "eval_samples_per_second": 627.256,
+ "eval_steps_per_second": 3.848,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12636
+ },
+ {
+ "epoch": 487.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626836933752,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813769340515137,
+ "eval_pr_auc": 0.6982353364927889,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2613,
+ "eval_samples_per_second": 623.782,
+ "eval_steps_per_second": 3.827,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12662
+ },
+ {
+ "epoch": 488.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336625766358866,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813805103302,
+ "eval_pr_auc": 0.6982371615828771,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2497,
+ "eval_samples_per_second": 652.662,
+ "eval_steps_per_second": 4.004,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12688
+ },
+ {
+ "epoch": 489.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982364881625377,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2636,
+ "eval_samples_per_second": 618.289,
+ "eval_steps_per_second": 3.793,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12714
+ },
+ {
+ "epoch": 490.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336626642283772,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982365330396263,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2579,
+ "eval_samples_per_second": 632.058,
+ "eval_steps_per_second": 3.878,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12740
+ },
+ {
+ "epoch": 491.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933662722623371,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982369774278672,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2559,
+ "eval_samples_per_second": 636.959,
+ "eval_steps_per_second": 3.908,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12766
+ },
+ {
+ "epoch": 492.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627420883689,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982367884435748,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1891,
+ "eval_samples_per_second": 862.094,
+ "eval_steps_per_second": 5.289,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12792
+ },
+ {
+ "epoch": 493.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628199483605,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982375574473259,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1955,
+ "eval_samples_per_second": 833.803,
+ "eval_steps_per_second": 5.115,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12818
+ },
+ {
+ "epoch": 494.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627810183648,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982374868010095,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.254,
+ "eval_samples_per_second": 641.752,
+ "eval_steps_per_second": 3.937,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12844
+ },
+ {
+ "epoch": 495.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628296808595,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982373741196756,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2245,
+ "eval_samples_per_second": 726.009,
+ "eval_steps_per_second": 4.454,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12870
+ },
+ {
+ "epoch": 496.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627712858657,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982369260304812,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2507,
+ "eval_samples_per_second": 650.217,
+ "eval_steps_per_second": 3.989,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12896
+ },
+ {
+ "epoch": 497.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627615533667,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.698237407972925,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2463,
+ "eval_samples_per_second": 661.887,
+ "eval_steps_per_second": 4.061,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12922
+ },
+ {
+ "epoch": 498.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.933662722623371,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982366928868454,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2494,
+ "eval_samples_per_second": 653.517,
+ "eval_steps_per_second": 4.009,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12948
+ },
+ {
+ "epoch": 499.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336627712858656,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982368906715606,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.1725,
+ "eval_samples_per_second": 945.076,
+ "eval_steps_per_second": 5.798,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 12974
+ },
+ {
+ "epoch": 500.0,
+ "grad_norm": 38080.09765625,
+ "learning_rate": 1.8024699288687884e-14,
+ "loss": 0.1978,
+ "step": 13000
+ },
+ {
+ "epoch": 500.0,
+ "eval_accuracy": 0.9067520846044336,
+ "eval_auc": 0.9336628296808596,
+ "eval_binding_site_ratio": 0.1576672767947936,
+ "eval_f1": 0.700032711808963,
+ "eval_f1_macro": 0.822414164585476,
+ "eval_loss": 0.26813796162605286,
+ "eval_pr_auc": 0.6982374485795344,
+ "eval_precision": 0.7102555592432791,
+ "eval_precision_macro": 0.826277584485044,
+ "eval_pred_class_0": 16655,
+ "eval_pred_class_1": 3013,
+ "eval_predicted_binding_ratio": 0.1531930038641448,
+ "eval_recall": 0.690099967752338,
+ "eval_recall_macro": 0.8187024254769416,
+ "eval_runtime": 0.2652,
+ "eval_samples_per_second": 614.65,
+ "eval_steps_per_second": 3.771,
+ "eval_total_tokens": 19668,
+ "eval_true_class_0": 16567,
+ "eval_true_class_1": 3101,
+ "step": 13000
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 13000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 500,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.0635606344403768e+16,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..43cdbe5adfb2a2b6cd48f66b6b5e6b0cc84c9bc3
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/checkpoint-13000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fce922531bcc60b40ec3cfe0214120623a297c18ab37c3a2e94007f715374c7
+size 5368
diff --git a/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0 b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0
new file mode 100644
index 0000000000000000000000000000000000000000..a778a181d44016e664db550883a9510ae556b80b
--- /dev/null
+++ b/finetune_glome_nano_site_extradata_dice_0_10_1_9_tiny/logs/events.out.tfevents.1772413682.amax.578393.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aab41102b2a53546cd1c6d1346cd03639b21aeb356f6085991a15eecf0dfac4
+size 583846
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/config.json
@@ -0,0 +1,44 @@
+{
+ "architectures": [
+ "GloMeModelForMaskedLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.0,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "vocab_size": 36
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/merges.txt
@@ -0,0 +1 @@
+#version: 0.2
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b5f413b39994d9f383e707cb449461b09827efab
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e526e0b7181592a81fb1b7ba28a8fffc5ff6de77e1f50b6b1e7e912c0d0cc7a
+size 61429032
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46bf7d75a50b4bcfd7c9c41e90be697354b2dc7e
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64144aae45faeb97e156e7fe6411b0a74ba7a067c6bd585b9ac7aba62ad82e3a
+size 122968954
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c573b36e0144e3072bdb18dc2bb9ffeb5a9dd7dd
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e927d415513ff83ac7aadcb06dbe2ca6fbfb935313cba0db936929660fa0c453
+size 14244
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49bac9878e7ca3459e2bbb6a4ff0e0d90965f8f9
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac77fb48a5120ab5d787e0d509c5ec923d039324ed23c2788fb412f6b926c15c
+size 1064
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/special_tokens_map.json
@@ -0,0 +1,51 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "cls_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "sep_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer.json
@@ -0,0 +1,123 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 27,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 28,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 29,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 30,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 31,
+ "content": "",
+ "single_word": false,
+ "lstrip": true,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": null,
+ "pre_tokenizer": {
+ "type": "ByteLevel",
+ "add_prefix_space": false,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "post_processor": {
+ "type": "RobertaProcessing",
+ "sep": [
+ "",
+ 29
+ ],
+ "cls": [
+ "",
+ 28
+ ],
+ "trim_offsets": true,
+ "add_prefix_space": false
+ },
+ "decoder": {
+ "type": "ByteLevel",
+ "add_prefix_space": true,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": null,
+ "continuing_subword_prefix": "",
+ "end_of_word_suffix": "",
+ "fuse_unk": false,
+ "byte_fallback": false,
+ "ignore_merges": false,
+ "vocab": {
+ "A": 0,
+ "R": 1,
+ "N": 2,
+ "D": 3,
+ "C": 4,
+ "Q": 5,
+ "E": 6,
+ "G": 7,
+ "H": 8,
+ "I": 9,
+ "L": 10,
+ "K": 11,
+ "M": 12,
+ "F": 13,
+ "P": 14,
+ "S": 15,
+ "T": 16,
+ "W": 17,
+ "Y": 18,
+ "V": 19,
+ "X": 20,
+ "B": 21,
+ "U": 22,
+ "Z": 23,
+ "O": 24,
+ ".": 25,
+ "-": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31
+ },
+ "merges": []
+ }
+}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/tokenizer_config.json
@@ -0,0 +1,58 @@
+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "27": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "28": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "29": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "30": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "31": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "errors": "replace",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "RobertaTokenizer",
+ "trim_offsets": true,
+ "unk_token": ""
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb345b0ed08918599e022176ae23a35ff7788a5d
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/trainer_state.json
@@ -0,0 +1,29071 @@
+{
+ "best_global_step": null,
+ "best_metric": 0.386392205953598,
+ "best_model_checkpoint": null,
+ "epoch": 1.057676928712575,
+ "eval_steps": 5000,
+ "global_step": 205000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0002579699826128232,
+ "grad_norm": 314643.03125,
+ "learning_rate": 4.9e-07,
+ "loss": 3.7018,
+ "step": 50
+ },
+ {
+ "epoch": 0.0005159399652256464,
+ "grad_norm": 286448.65625,
+ "learning_rate": 9.9e-07,
+ "loss": 3.6146,
+ "step": 100
+ },
+ {
+ "epoch": 0.0007739099478384695,
+ "grad_norm": 214582.28125,
+ "learning_rate": 1.4900000000000001e-06,
+ "loss": 3.4562,
+ "step": 150
+ },
+ {
+ "epoch": 0.0010318799304512927,
+ "grad_norm": 137193.9375,
+ "learning_rate": 1.99e-06,
+ "loss": 3.2558,
+ "step": 200
+ },
+ {
+ "epoch": 0.0012898499130641159,
+ "grad_norm": 82222.84375,
+ "learning_rate": 2.49e-06,
+ "loss": 3.0641,
+ "step": 250
+ },
+ {
+ "epoch": 0.001547819895676939,
+ "grad_norm": 56772.109375,
+ "learning_rate": 2.99e-06,
+ "loss": 2.9123,
+ "step": 300
+ },
+ {
+ "epoch": 0.0018057898782897622,
+ "grad_norm": 46737.6796875,
+ "learning_rate": 3.49e-06,
+ "loss": 2.8131,
+ "step": 350
+ },
+ {
+ "epoch": 0.0020637598609025854,
+ "grad_norm": 38769.04296875,
+ "learning_rate": 3.99e-06,
+ "loss": 2.7526,
+ "step": 400
+ },
+ {
+ "epoch": 0.0023217298435154084,
+ "grad_norm": 34701.5546875,
+ "learning_rate": 4.49e-06,
+ "loss": 2.7178,
+ "step": 450
+ },
+ {
+ "epoch": 0.0025796998261282318,
+ "grad_norm": 30332.826171875,
+ "learning_rate": 4.9900000000000005e-06,
+ "loss": 2.6967,
+ "step": 500
+ },
+ {
+ "epoch": 0.0028376698087410547,
+ "grad_norm": 27192.7421875,
+ "learning_rate": 5.49e-06,
+ "loss": 2.6798,
+ "step": 550
+ },
+ {
+ "epoch": 0.003095639791353878,
+ "grad_norm": 36959.33984375,
+ "learning_rate": 5.99e-06,
+ "loss": 2.668,
+ "step": 600
+ },
+ {
+ "epoch": 0.003353609773966701,
+ "grad_norm": 30939.365234375,
+ "learning_rate": 6.4900000000000005e-06,
+ "loss": 2.6572,
+ "step": 650
+ },
+ {
+ "epoch": 0.0036115797565795245,
+ "grad_norm": 26976.78125,
+ "learning_rate": 6.990000000000001e-06,
+ "loss": 2.6397,
+ "step": 700
+ },
+ {
+ "epoch": 0.0038695497391923474,
+ "grad_norm": 32308.447265625,
+ "learning_rate": 7.4899999999999994e-06,
+ "loss": 2.6021,
+ "step": 750
+ },
+ {
+ "epoch": 0.004127519721805171,
+ "grad_norm": 33838.8046875,
+ "learning_rate": 7.99e-06,
+ "loss": 2.5058,
+ "step": 800
+ },
+ {
+ "epoch": 0.004385489704417994,
+ "grad_norm": 49298.84765625,
+ "learning_rate": 8.49e-06,
+ "loss": 2.4095,
+ "step": 850
+ },
+ {
+ "epoch": 0.004643459687030817,
+ "grad_norm": 36241.56640625,
+ "learning_rate": 8.99e-06,
+ "loss": 2.2961,
+ "step": 900
+ },
+ {
+ "epoch": 0.004901429669643641,
+ "grad_norm": 37884.82421875,
+ "learning_rate": 9.49e-06,
+ "loss": 2.1526,
+ "step": 950
+ },
+ {
+ "epoch": 0.0051593996522564635,
+ "grad_norm": 36827.66796875,
+ "learning_rate": 9.990000000000001e-06,
+ "loss": 2.0338,
+ "step": 1000
+ },
+ {
+ "epoch": 0.0054173696348692865,
+ "grad_norm": 41398.421875,
+ "learning_rate": 1.049e-05,
+ "loss": 1.939,
+ "step": 1050
+ },
+ {
+ "epoch": 0.0056753396174821094,
+ "grad_norm": 42648.38671875,
+ "learning_rate": 1.099e-05,
+ "loss": 1.8325,
+ "step": 1100
+ },
+ {
+ "epoch": 0.005933309600094933,
+ "grad_norm": 40172.9453125,
+ "learning_rate": 1.149e-05,
+ "loss": 1.7643,
+ "step": 1150
+ },
+ {
+ "epoch": 0.006191279582707756,
+ "grad_norm": 35860.8515625,
+ "learning_rate": 1.199e-05,
+ "loss": 1.6742,
+ "step": 1200
+ },
+ {
+ "epoch": 0.006449249565320579,
+ "grad_norm": 44456.93359375,
+ "learning_rate": 1.249e-05,
+ "loss": 1.6026,
+ "step": 1250
+ },
+ {
+ "epoch": 0.006707219547933402,
+ "grad_norm": 36839.08984375,
+ "learning_rate": 1.299e-05,
+ "loss": 1.521,
+ "step": 1300
+ },
+ {
+ "epoch": 0.006965189530546226,
+ "grad_norm": 44026.68359375,
+ "learning_rate": 1.349e-05,
+ "loss": 1.4436,
+ "step": 1350
+ },
+ {
+ "epoch": 0.007223159513159049,
+ "grad_norm": 35557.578125,
+ "learning_rate": 1.399e-05,
+ "loss": 1.3773,
+ "step": 1400
+ },
+ {
+ "epoch": 0.007481129495771872,
+ "grad_norm": 38767.60546875,
+ "learning_rate": 1.449e-05,
+ "loss": 1.3023,
+ "step": 1450
+ },
+ {
+ "epoch": 0.007739099478384695,
+ "grad_norm": 36654.796875,
+ "learning_rate": 1.499e-05,
+ "loss": 1.2627,
+ "step": 1500
+ },
+ {
+ "epoch": 0.007997069460997519,
+ "grad_norm": 41690.328125,
+ "learning_rate": 1.5490000000000002e-05,
+ "loss": 1.2063,
+ "step": 1550
+ },
+ {
+ "epoch": 0.008255039443610342,
+ "grad_norm": 38743.59375,
+ "learning_rate": 1.599e-05,
+ "loss": 1.1626,
+ "step": 1600
+ },
+ {
+ "epoch": 0.008513009426223165,
+ "grad_norm": 41839.7890625,
+ "learning_rate": 1.649e-05,
+ "loss": 1.1225,
+ "step": 1650
+ },
+ {
+ "epoch": 0.008770979408835988,
+ "grad_norm": 42897.0703125,
+ "learning_rate": 1.699e-05,
+ "loss": 1.0864,
+ "step": 1700
+ },
+ {
+ "epoch": 0.00902894939144881,
+ "grad_norm": 37412.30859375,
+ "learning_rate": 1.749e-05,
+ "loss": 1.0613,
+ "step": 1750
+ },
+ {
+ "epoch": 0.009286919374061633,
+ "grad_norm": 37235.484375,
+ "learning_rate": 1.7990000000000002e-05,
+ "loss": 1.0354,
+ "step": 1800
+ },
+ {
+ "epoch": 0.009544889356674458,
+ "grad_norm": 39117.6328125,
+ "learning_rate": 1.849e-05,
+ "loss": 1.0059,
+ "step": 1850
+ },
+ {
+ "epoch": 0.009802859339287281,
+ "grad_norm": 37297.6875,
+ "learning_rate": 1.8990000000000003e-05,
+ "loss": 0.9795,
+ "step": 1900
+ },
+ {
+ "epoch": 0.010060829321900104,
+ "grad_norm": 33772.24609375,
+ "learning_rate": 1.949e-05,
+ "loss": 0.9639,
+ "step": 1950
+ },
+ {
+ "epoch": 0.010318799304512927,
+ "grad_norm": 39775.046875,
+ "learning_rate": 1.999e-05,
+ "loss": 0.9386,
+ "step": 2000
+ },
+ {
+ "epoch": 0.01057676928712575,
+ "grad_norm": 38412.2109375,
+ "learning_rate": 2.0490000000000002e-05,
+ "loss": 0.9212,
+ "step": 2050
+ },
+ {
+ "epoch": 0.010834739269738573,
+ "grad_norm": 39548.98046875,
+ "learning_rate": 2.099e-05,
+ "loss": 0.9112,
+ "step": 2100
+ },
+ {
+ "epoch": 0.011092709252351396,
+ "grad_norm": 38127.77734375,
+ "learning_rate": 2.1490000000000003e-05,
+ "loss": 0.8866,
+ "step": 2150
+ },
+ {
+ "epoch": 0.011350679234964219,
+ "grad_norm": 39877.0390625,
+ "learning_rate": 2.199e-05,
+ "loss": 0.8806,
+ "step": 2200
+ },
+ {
+ "epoch": 0.011608649217577044,
+ "grad_norm": 34642.28515625,
+ "learning_rate": 2.249e-05,
+ "loss": 0.8645,
+ "step": 2250
+ },
+ {
+ "epoch": 0.011866619200189867,
+ "grad_norm": 38508.0078125,
+ "learning_rate": 2.2990000000000002e-05,
+ "loss": 0.8609,
+ "step": 2300
+ },
+ {
+ "epoch": 0.01212458918280269,
+ "grad_norm": 33287.765625,
+ "learning_rate": 2.349e-05,
+ "loss": 0.8443,
+ "step": 2350
+ },
+ {
+ "epoch": 0.012382559165415512,
+ "grad_norm": 35477.5546875,
+ "learning_rate": 2.3990000000000002e-05,
+ "loss": 0.839,
+ "step": 2400
+ },
+ {
+ "epoch": 0.012640529148028335,
+ "grad_norm": 32204.408203125,
+ "learning_rate": 2.449e-05,
+ "loss": 0.8204,
+ "step": 2450
+ },
+ {
+ "epoch": 0.012898499130641158,
+ "grad_norm": 35113.59765625,
+ "learning_rate": 2.4990000000000003e-05,
+ "loss": 0.8214,
+ "step": 2500
+ },
+ {
+ "epoch": 0.013156469113253981,
+ "grad_norm": 36591.2421875,
+ "learning_rate": 2.549e-05,
+ "loss": 0.8066,
+ "step": 2550
+ },
+ {
+ "epoch": 0.013414439095866804,
+ "grad_norm": 37926.3125,
+ "learning_rate": 2.5990000000000004e-05,
+ "loss": 0.7993,
+ "step": 2600
+ },
+ {
+ "epoch": 0.013672409078479627,
+ "grad_norm": 35413.01171875,
+ "learning_rate": 2.6490000000000002e-05,
+ "loss": 0.8012,
+ "step": 2650
+ },
+ {
+ "epoch": 0.013930379061092452,
+ "grad_norm": 33275.1796875,
+ "learning_rate": 2.6989999999999997e-05,
+ "loss": 0.7879,
+ "step": 2700
+ },
+ {
+ "epoch": 0.014188349043705275,
+ "grad_norm": 35463.87109375,
+ "learning_rate": 2.749e-05,
+ "loss": 0.7808,
+ "step": 2750
+ },
+ {
+ "epoch": 0.014446319026318098,
+ "grad_norm": 33143.234375,
+ "learning_rate": 2.7989999999999998e-05,
+ "loss": 0.7813,
+ "step": 2800
+ },
+ {
+ "epoch": 0.01470428900893092,
+ "grad_norm": 32908.71484375,
+ "learning_rate": 2.849e-05,
+ "loss": 0.7725,
+ "step": 2850
+ },
+ {
+ "epoch": 0.014962258991543744,
+ "grad_norm": 36443.578125,
+ "learning_rate": 2.8990000000000002e-05,
+ "loss": 0.761,
+ "step": 2900
+ },
+ {
+ "epoch": 0.015220228974156567,
+ "grad_norm": 32331.728515625,
+ "learning_rate": 2.949e-05,
+ "loss": 0.7588,
+ "step": 2950
+ },
+ {
+ "epoch": 0.01547819895676939,
+ "grad_norm": 33401.546875,
+ "learning_rate": 2.9990000000000003e-05,
+ "loss": 0.7462,
+ "step": 3000
+ },
+ {
+ "epoch": 0.015736168939382213,
+ "grad_norm": 32041.26171875,
+ "learning_rate": 3.049e-05,
+ "loss": 0.7449,
+ "step": 3050
+ },
+ {
+ "epoch": 0.015994138921995037,
+ "grad_norm": 32035.814453125,
+ "learning_rate": 3.099e-05,
+ "loss": 0.7373,
+ "step": 3100
+ },
+ {
+ "epoch": 0.01625210890460786,
+ "grad_norm": 31430.421875,
+ "learning_rate": 3.1490000000000005e-05,
+ "loss": 0.7371,
+ "step": 3150
+ },
+ {
+ "epoch": 0.016510078887220683,
+ "grad_norm": 30911.267578125,
+ "learning_rate": 3.1990000000000004e-05,
+ "loss": 0.7315,
+ "step": 3200
+ },
+ {
+ "epoch": 0.016768048869833505,
+ "grad_norm": 31906.193359375,
+ "learning_rate": 3.249e-05,
+ "loss": 0.7405,
+ "step": 3250
+ },
+ {
+ "epoch": 0.01702601885244633,
+ "grad_norm": 30320.1640625,
+ "learning_rate": 3.299e-05,
+ "loss": 0.7323,
+ "step": 3300
+ },
+ {
+ "epoch": 0.017283988835059154,
+ "grad_norm": 32357.072265625,
+ "learning_rate": 3.349e-05,
+ "loss": 0.7244,
+ "step": 3350
+ },
+ {
+ "epoch": 0.017541958817671975,
+ "grad_norm": 34023.2109375,
+ "learning_rate": 3.399e-05,
+ "loss": 0.7214,
+ "step": 3400
+ },
+ {
+ "epoch": 0.0177999288002848,
+ "grad_norm": 33940.8046875,
+ "learning_rate": 3.449e-05,
+ "loss": 0.7158,
+ "step": 3450
+ },
+ {
+ "epoch": 0.01805789878289762,
+ "grad_norm": 31701.14453125,
+ "learning_rate": 3.499e-05,
+ "loss": 0.7102,
+ "step": 3500
+ },
+ {
+ "epoch": 0.018315868765510446,
+ "grad_norm": 32291.861328125,
+ "learning_rate": 3.549e-05,
+ "loss": 0.7104,
+ "step": 3550
+ },
+ {
+ "epoch": 0.018573838748123267,
+ "grad_norm": 28074.177734375,
+ "learning_rate": 3.599e-05,
+ "loss": 0.7001,
+ "step": 3600
+ },
+ {
+ "epoch": 0.01883180873073609,
+ "grad_norm": 29823.787109375,
+ "learning_rate": 3.6490000000000005e-05,
+ "loss": 0.7029,
+ "step": 3650
+ },
+ {
+ "epoch": 0.019089778713348916,
+ "grad_norm": 29792.24609375,
+ "learning_rate": 3.699e-05,
+ "loss": 0.6949,
+ "step": 3700
+ },
+ {
+ "epoch": 0.019347748695961738,
+ "grad_norm": 31345.296875,
+ "learning_rate": 3.749e-05,
+ "loss": 0.6989,
+ "step": 3750
+ },
+ {
+ "epoch": 0.019605718678574562,
+ "grad_norm": 33923.0625,
+ "learning_rate": 3.799e-05,
+ "loss": 0.6984,
+ "step": 3800
+ },
+ {
+ "epoch": 0.019863688661187383,
+ "grad_norm": 30762.97265625,
+ "learning_rate": 3.8490000000000006e-05,
+ "loss": 0.6931,
+ "step": 3850
+ },
+ {
+ "epoch": 0.020121658643800208,
+ "grad_norm": 30794.13671875,
+ "learning_rate": 3.8990000000000004e-05,
+ "loss": 0.6923,
+ "step": 3900
+ },
+ {
+ "epoch": 0.02037962862641303,
+ "grad_norm": 29854.923828125,
+ "learning_rate": 3.9489999999999996e-05,
+ "loss": 0.6895,
+ "step": 3950
+ },
+ {
+ "epoch": 0.020637598609025854,
+ "grad_norm": 27336.958984375,
+ "learning_rate": 3.999e-05,
+ "loss": 0.6853,
+ "step": 4000
+ },
+ {
+ "epoch": 0.020895568591638675,
+ "grad_norm": 31836.81640625,
+ "learning_rate": 4.049e-05,
+ "loss": 0.6821,
+ "step": 4050
+ },
+ {
+ "epoch": 0.0211535385742515,
+ "grad_norm": 28508.548828125,
+ "learning_rate": 4.099e-05,
+ "loss": 0.6857,
+ "step": 4100
+ },
+ {
+ "epoch": 0.021411508556864325,
+ "grad_norm": 30309.2421875,
+ "learning_rate": 4.1490000000000004e-05,
+ "loss": 0.6791,
+ "step": 4150
+ },
+ {
+ "epoch": 0.021669478539477146,
+ "grad_norm": 31035.0703125,
+ "learning_rate": 4.199e-05,
+ "loss": 0.6762,
+ "step": 4200
+ },
+ {
+ "epoch": 0.02192744852208997,
+ "grad_norm": 30893.951171875,
+ "learning_rate": 4.249e-05,
+ "loss": 0.6739,
+ "step": 4250
+ },
+ {
+ "epoch": 0.022185418504702792,
+ "grad_norm": 28317.12890625,
+ "learning_rate": 4.299e-05,
+ "loss": 0.6635,
+ "step": 4300
+ },
+ {
+ "epoch": 0.022443388487315617,
+ "grad_norm": 27140.29296875,
+ "learning_rate": 4.3490000000000005e-05,
+ "loss": 0.6694,
+ "step": 4350
+ },
+ {
+ "epoch": 0.022701358469928438,
+ "grad_norm": 27948.32421875,
+ "learning_rate": 4.3990000000000004e-05,
+ "loss": 0.6667,
+ "step": 4400
+ },
+ {
+ "epoch": 0.022959328452541262,
+ "grad_norm": 27243.44140625,
+ "learning_rate": 4.449e-05,
+ "loss": 0.6689,
+ "step": 4450
+ },
+ {
+ "epoch": 0.023217298435154087,
+ "grad_norm": 29163.98828125,
+ "learning_rate": 4.499e-05,
+ "loss": 0.6639,
+ "step": 4500
+ },
+ {
+ "epoch": 0.02347526841776691,
+ "grad_norm": 27801.79296875,
+ "learning_rate": 4.549000000000001e-05,
+ "loss": 0.6612,
+ "step": 4550
+ },
+ {
+ "epoch": 0.023733238400379733,
+ "grad_norm": 28201.7265625,
+ "learning_rate": 4.599e-05,
+ "loss": 0.6608,
+ "step": 4600
+ },
+ {
+ "epoch": 0.023991208382992554,
+ "grad_norm": 28875.06640625,
+ "learning_rate": 4.649e-05,
+ "loss": 0.6642,
+ "step": 4650
+ },
+ {
+ "epoch": 0.02424917836560538,
+ "grad_norm": 25467.376953125,
+ "learning_rate": 4.699e-05,
+ "loss": 0.6513,
+ "step": 4700
+ },
+ {
+ "epoch": 0.0245071483482182,
+ "grad_norm": 27359.97265625,
+ "learning_rate": 4.749e-05,
+ "loss": 0.6554,
+ "step": 4750
+ },
+ {
+ "epoch": 0.024765118330831025,
+ "grad_norm": 30614.15234375,
+ "learning_rate": 4.799e-05,
+ "loss": 0.6574,
+ "step": 4800
+ },
+ {
+ "epoch": 0.025023088313443846,
+ "grad_norm": 29069.677734375,
+ "learning_rate": 4.8490000000000005e-05,
+ "loss": 0.6562,
+ "step": 4850
+ },
+ {
+ "epoch": 0.02528105829605667,
+ "grad_norm": 27337.37109375,
+ "learning_rate": 4.8990000000000004e-05,
+ "loss": 0.6507,
+ "step": 4900
+ },
+ {
+ "epoch": 0.025539028278669496,
+ "grad_norm": 26784.7265625,
+ "learning_rate": 4.949e-05,
+ "loss": 0.64,
+ "step": 4950
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "grad_norm": 27480.509765625,
+ "learning_rate": 4.999e-05,
+ "loss": 0.6515,
+ "step": 5000
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "eval_loss": 0.6312834024429321,
+ "eval_runtime": 3280.995,
+ "eval_samples_per_second": 945.177,
+ "eval_steps_per_second": 1.846,
+ "step": 5000
+ },
+ {
+ "epoch": 0.02605496824389514,
+ "grad_norm": 27871.740234375,
+ "learning_rate": 5.0490000000000006e-05,
+ "loss": 0.6424,
+ "step": 5050
+ },
+ {
+ "epoch": 0.026312938226507963,
+ "grad_norm": 31187.00390625,
+ "learning_rate": 5.0990000000000005e-05,
+ "loss": 0.643,
+ "step": 5100
+ },
+ {
+ "epoch": 0.026570908209120787,
+ "grad_norm": 25956.521484375,
+ "learning_rate": 5.149e-05,
+ "loss": 0.65,
+ "step": 5150
+ },
+ {
+ "epoch": 0.02682887819173361,
+ "grad_norm": 25967.70703125,
+ "learning_rate": 5.199000000000001e-05,
+ "loss": 0.6466,
+ "step": 5200
+ },
+ {
+ "epoch": 0.027086848174346433,
+ "grad_norm": 25310.275390625,
+ "learning_rate": 5.249000000000001e-05,
+ "loss": 0.6429,
+ "step": 5250
+ },
+ {
+ "epoch": 0.027344818156959255,
+ "grad_norm": 24740.033203125,
+ "learning_rate": 5.2990000000000006e-05,
+ "loss": 0.6415,
+ "step": 5300
+ },
+ {
+ "epoch": 0.02760278813957208,
+ "grad_norm": 30795.58984375,
+ "learning_rate": 5.3490000000000005e-05,
+ "loss": 0.6424,
+ "step": 5350
+ },
+ {
+ "epoch": 0.027860758122184904,
+ "grad_norm": 30625.59375,
+ "learning_rate": 5.399000000000001e-05,
+ "loss": 0.6361,
+ "step": 5400
+ },
+ {
+ "epoch": 0.028118728104797725,
+ "grad_norm": 27036.14453125,
+ "learning_rate": 5.449000000000001e-05,
+ "loss": 0.6351,
+ "step": 5450
+ },
+ {
+ "epoch": 0.02837669808741055,
+ "grad_norm": 26934.447265625,
+ "learning_rate": 5.499000000000001e-05,
+ "loss": 0.6304,
+ "step": 5500
+ },
+ {
+ "epoch": 0.02863466807002337,
+ "grad_norm": 25540.291015625,
+ "learning_rate": 5.549e-05,
+ "loss": 0.6304,
+ "step": 5550
+ },
+ {
+ "epoch": 0.028892638052636196,
+ "grad_norm": 26574.9375,
+ "learning_rate": 5.599e-05,
+ "loss": 0.6444,
+ "step": 5600
+ },
+ {
+ "epoch": 0.029150608035249017,
+ "grad_norm": 26941.955078125,
+ "learning_rate": 5.6489999999999996e-05,
+ "loss": 0.6373,
+ "step": 5650
+ },
+ {
+ "epoch": 0.02940857801786184,
+ "grad_norm": 26957.7734375,
+ "learning_rate": 5.699e-05,
+ "loss": 0.6363,
+ "step": 5700
+ },
+ {
+ "epoch": 0.029666548000474666,
+ "grad_norm": 24377.55859375,
+ "learning_rate": 5.749e-05,
+ "loss": 0.6213,
+ "step": 5750
+ },
+ {
+ "epoch": 0.029924517983087488,
+ "grad_norm": 25600.697265625,
+ "learning_rate": 5.799e-05,
+ "loss": 0.6362,
+ "step": 5800
+ },
+ {
+ "epoch": 0.030182487965700312,
+ "grad_norm": 23841.47265625,
+ "learning_rate": 5.849e-05,
+ "loss": 0.6274,
+ "step": 5850
+ },
+ {
+ "epoch": 0.030440457948313134,
+ "grad_norm": 23847.73046875,
+ "learning_rate": 5.899e-05,
+ "loss": 0.624,
+ "step": 5900
+ },
+ {
+ "epoch": 0.030698427930925958,
+ "grad_norm": 25549.033203125,
+ "learning_rate": 5.949e-05,
+ "loss": 0.627,
+ "step": 5950
+ },
+ {
+ "epoch": 0.03095639791353878,
+ "grad_norm": 25286.8046875,
+ "learning_rate": 5.999e-05,
+ "loss": 0.6272,
+ "step": 6000
+ },
+ {
+ "epoch": 0.031214367896151604,
+ "grad_norm": 25137.384765625,
+ "learning_rate": 6.0490000000000005e-05,
+ "loss": 0.622,
+ "step": 6050
+ },
+ {
+ "epoch": 0.031472337878764425,
+ "grad_norm": 23606.23828125,
+ "learning_rate": 6.0990000000000004e-05,
+ "loss": 0.6262,
+ "step": 6100
+ },
+ {
+ "epoch": 0.031730307861377254,
+ "grad_norm": 32101.404296875,
+ "learning_rate": 6.149000000000001e-05,
+ "loss": 0.619,
+ "step": 6150
+ },
+ {
+ "epoch": 0.031988277843990075,
+ "grad_norm": 23683.73046875,
+ "learning_rate": 6.199000000000001e-05,
+ "loss": 0.6129,
+ "step": 6200
+ },
+ {
+ "epoch": 0.032246247826602896,
+ "grad_norm": 25243.49609375,
+ "learning_rate": 6.249e-05,
+ "loss": 0.6194,
+ "step": 6250
+ },
+ {
+ "epoch": 0.03250421780921572,
+ "grad_norm": 28690.10546875,
+ "learning_rate": 6.299e-05,
+ "loss": 0.6199,
+ "step": 6300
+ },
+ {
+ "epoch": 0.032762187791828545,
+ "grad_norm": 24198.47265625,
+ "learning_rate": 6.349e-05,
+ "loss": 0.6077,
+ "step": 6350
+ },
+ {
+ "epoch": 0.03302015777444137,
+ "grad_norm": 24742.998046875,
+ "learning_rate": 6.399e-05,
+ "loss": 0.6168,
+ "step": 6400
+ },
+ {
+ "epoch": 0.03327812775705419,
+ "grad_norm": 27489.93359375,
+ "learning_rate": 6.449e-05,
+ "loss": 0.6136,
+ "step": 6450
+ },
+ {
+ "epoch": 0.03353609773966701,
+ "grad_norm": 28733.7265625,
+ "learning_rate": 6.499000000000001e-05,
+ "loss": 0.6184,
+ "step": 6500
+ },
+ {
+ "epoch": 0.03379406772227984,
+ "grad_norm": 23810.544921875,
+ "learning_rate": 6.549000000000001e-05,
+ "loss": 0.6167,
+ "step": 6550
+ },
+ {
+ "epoch": 0.03405203770489266,
+ "grad_norm": 25503.98828125,
+ "learning_rate": 6.599000000000001e-05,
+ "loss": 0.6184,
+ "step": 6600
+ },
+ {
+ "epoch": 0.03431000768750548,
+ "grad_norm": 24550.26171875,
+ "learning_rate": 6.649000000000001e-05,
+ "loss": 0.6146,
+ "step": 6650
+ },
+ {
+ "epoch": 0.03456797767011831,
+ "grad_norm": 22774.71875,
+ "learning_rate": 6.699000000000001e-05,
+ "loss": 0.6132,
+ "step": 6700
+ },
+ {
+ "epoch": 0.03482594765273113,
+ "grad_norm": 23878.90625,
+ "learning_rate": 6.749e-05,
+ "loss": 0.6127,
+ "step": 6750
+ },
+ {
+ "epoch": 0.03508391763534395,
+ "grad_norm": 28744.9921875,
+ "learning_rate": 6.799e-05,
+ "loss": 0.6203,
+ "step": 6800
+ },
+ {
+ "epoch": 0.03534188761795677,
+ "grad_norm": 24239.826171875,
+ "learning_rate": 6.849e-05,
+ "loss": 0.6069,
+ "step": 6850
+ },
+ {
+ "epoch": 0.0355998576005696,
+ "grad_norm": 27030.513671875,
+ "learning_rate": 6.899e-05,
+ "loss": 0.614,
+ "step": 6900
+ },
+ {
+ "epoch": 0.03585782758318242,
+ "grad_norm": 22872.59375,
+ "learning_rate": 6.949e-05,
+ "loss": 0.6068,
+ "step": 6950
+ },
+ {
+ "epoch": 0.03611579756579524,
+ "grad_norm": 23280.333984375,
+ "learning_rate": 6.999e-05,
+ "loss": 0.6064,
+ "step": 7000
+ },
+ {
+ "epoch": 0.03637376754840807,
+ "grad_norm": 24819.060546875,
+ "learning_rate": 7.049e-05,
+ "loss": 0.606,
+ "step": 7050
+ },
+ {
+ "epoch": 0.03663173753102089,
+ "grad_norm": 23739.595703125,
+ "learning_rate": 7.099e-05,
+ "loss": 0.6065,
+ "step": 7100
+ },
+ {
+ "epoch": 0.03688970751363371,
+ "grad_norm": 24261.28515625,
+ "learning_rate": 7.149e-05,
+ "loss": 0.6037,
+ "step": 7150
+ },
+ {
+ "epoch": 0.037147677496246534,
+ "grad_norm": 24133.744140625,
+ "learning_rate": 7.199000000000001e-05,
+ "loss": 0.6097,
+ "step": 7200
+ },
+ {
+ "epoch": 0.03740564747885936,
+ "grad_norm": 22903.197265625,
+ "learning_rate": 7.249e-05,
+ "loss": 0.6048,
+ "step": 7250
+ },
+ {
+ "epoch": 0.03766361746147218,
+ "grad_norm": 23503.970703125,
+ "learning_rate": 7.299e-05,
+ "loss": 0.6039,
+ "step": 7300
+ },
+ {
+ "epoch": 0.037921587444085005,
+ "grad_norm": 20935.388671875,
+ "learning_rate": 7.349e-05,
+ "loss": 0.6016,
+ "step": 7350
+ },
+ {
+ "epoch": 0.03817955742669783,
+ "grad_norm": 22991.720703125,
+ "learning_rate": 7.399e-05,
+ "loss": 0.6111,
+ "step": 7400
+ },
+ {
+ "epoch": 0.038437527409310654,
+ "grad_norm": 21915.90234375,
+ "learning_rate": 7.449e-05,
+ "loss": 0.5969,
+ "step": 7450
+ },
+ {
+ "epoch": 0.038695497391923475,
+ "grad_norm": 22474.25390625,
+ "learning_rate": 7.499e-05,
+ "loss": 0.6068,
+ "step": 7500
+ },
+ {
+ "epoch": 0.038953467374536296,
+ "grad_norm": 24122.150390625,
+ "learning_rate": 7.549000000000001e-05,
+ "loss": 0.6037,
+ "step": 7550
+ },
+ {
+ "epoch": 0.039211437357149125,
+ "grad_norm": 22262.220703125,
+ "learning_rate": 7.599000000000001e-05,
+ "loss": 0.5946,
+ "step": 7600
+ },
+ {
+ "epoch": 0.039469407339761946,
+ "grad_norm": 23959.7265625,
+ "learning_rate": 7.649000000000001e-05,
+ "loss": 0.598,
+ "step": 7650
+ },
+ {
+ "epoch": 0.03972737732237477,
+ "grad_norm": 21918.5859375,
+ "learning_rate": 7.699e-05,
+ "loss": 0.5959,
+ "step": 7700
+ },
+ {
+ "epoch": 0.03998534730498759,
+ "grad_norm": 23740.5390625,
+ "learning_rate": 7.749e-05,
+ "loss": 0.594,
+ "step": 7750
+ },
+ {
+ "epoch": 0.040243317287600416,
+ "grad_norm": 23406.4296875,
+ "learning_rate": 7.799e-05,
+ "loss": 0.6048,
+ "step": 7800
+ },
+ {
+ "epoch": 0.04050128727021324,
+ "grad_norm": 23423.201171875,
+ "learning_rate": 7.849e-05,
+ "loss": 0.5944,
+ "step": 7850
+ },
+ {
+ "epoch": 0.04075925725282606,
+ "grad_norm": 23187.76171875,
+ "learning_rate": 7.899000000000001e-05,
+ "loss": 0.5944,
+ "step": 7900
+ },
+ {
+ "epoch": 0.04101722723543889,
+ "grad_norm": 25532.4375,
+ "learning_rate": 7.949000000000001e-05,
+ "loss": 0.5978,
+ "step": 7950
+ },
+ {
+ "epoch": 0.04127519721805171,
+ "grad_norm": 23045.28515625,
+ "learning_rate": 7.999000000000001e-05,
+ "loss": 0.5968,
+ "step": 8000
+ },
+ {
+ "epoch": 0.04153316720066453,
+ "grad_norm": 22853.826171875,
+ "learning_rate": 8.049e-05,
+ "loss": 0.5915,
+ "step": 8050
+ },
+ {
+ "epoch": 0.04179113718327735,
+ "grad_norm": 21853.658203125,
+ "learning_rate": 8.099e-05,
+ "loss": 0.5932,
+ "step": 8100
+ },
+ {
+ "epoch": 0.04204910716589018,
+ "grad_norm": 22395.74609375,
+ "learning_rate": 8.149e-05,
+ "loss": 0.5925,
+ "step": 8150
+ },
+ {
+ "epoch": 0.042307077148503,
+ "grad_norm": 23933.40625,
+ "learning_rate": 8.199e-05,
+ "loss": 0.5878,
+ "step": 8200
+ },
+ {
+ "epoch": 0.04256504713111582,
+ "grad_norm": 21773.087890625,
+ "learning_rate": 8.249e-05,
+ "loss": 0.5916,
+ "step": 8250
+ },
+ {
+ "epoch": 0.04282301711372865,
+ "grad_norm": 22665.11328125,
+ "learning_rate": 8.299e-05,
+ "loss": 0.5906,
+ "step": 8300
+ },
+ {
+ "epoch": 0.04308098709634147,
+ "grad_norm": 22157.091796875,
+ "learning_rate": 8.349e-05,
+ "loss": 0.5873,
+ "step": 8350
+ },
+ {
+ "epoch": 0.04333895707895429,
+ "grad_norm": 21506.8125,
+ "learning_rate": 8.399e-05,
+ "loss": 0.5927,
+ "step": 8400
+ },
+ {
+ "epoch": 0.04359692706156711,
+ "grad_norm": 22143.341796875,
+ "learning_rate": 8.449e-05,
+ "loss": 0.5828,
+ "step": 8450
+ },
+ {
+ "epoch": 0.04385489704417994,
+ "grad_norm": 23341.23828125,
+ "learning_rate": 8.499e-05,
+ "loss": 0.5885,
+ "step": 8500
+ },
+ {
+ "epoch": 0.04411286702679276,
+ "grad_norm": 21876.96484375,
+ "learning_rate": 8.549000000000001e-05,
+ "loss": 0.5913,
+ "step": 8550
+ },
+ {
+ "epoch": 0.044370837009405584,
+ "grad_norm": 22307.29296875,
+ "learning_rate": 8.599000000000001e-05,
+ "loss": 0.583,
+ "step": 8600
+ },
+ {
+ "epoch": 0.04462880699201841,
+ "grad_norm": 22859.017578125,
+ "learning_rate": 8.649000000000001e-05,
+ "loss": 0.5889,
+ "step": 8650
+ },
+ {
+ "epoch": 0.04488677697463123,
+ "grad_norm": 22058.24609375,
+ "learning_rate": 8.699e-05,
+ "loss": 0.5848,
+ "step": 8700
+ },
+ {
+ "epoch": 0.045144746957244054,
+ "grad_norm": 22116.837890625,
+ "learning_rate": 8.749e-05,
+ "loss": 0.5858,
+ "step": 8750
+ },
+ {
+ "epoch": 0.045402716939856876,
+ "grad_norm": 23110.17578125,
+ "learning_rate": 8.799e-05,
+ "loss": 0.5855,
+ "step": 8800
+ },
+ {
+ "epoch": 0.045660686922469704,
+ "grad_norm": 24173.064453125,
+ "learning_rate": 8.849e-05,
+ "loss": 0.5878,
+ "step": 8850
+ },
+ {
+ "epoch": 0.045918656905082525,
+ "grad_norm": 21521.48046875,
+ "learning_rate": 8.899e-05,
+ "loss": 0.5914,
+ "step": 8900
+ },
+ {
+ "epoch": 0.046176626887695346,
+ "grad_norm": 24516.0,
+ "learning_rate": 8.949000000000001e-05,
+ "loss": 0.5849,
+ "step": 8950
+ },
+ {
+ "epoch": 0.046434596870308174,
+ "grad_norm": 22074.9609375,
+ "learning_rate": 8.999000000000001e-05,
+ "loss": 0.5848,
+ "step": 9000
+ },
+ {
+ "epoch": 0.046692566852920996,
+ "grad_norm": 21495.4140625,
+ "learning_rate": 9.049000000000001e-05,
+ "loss": 0.579,
+ "step": 9050
+ },
+ {
+ "epoch": 0.04695053683553382,
+ "grad_norm": 23548.224609375,
+ "learning_rate": 9.099000000000001e-05,
+ "loss": 0.5826,
+ "step": 9100
+ },
+ {
+ "epoch": 0.04720850681814664,
+ "grad_norm": 22144.51953125,
+ "learning_rate": 9.149e-05,
+ "loss": 0.5879,
+ "step": 9150
+ },
+ {
+ "epoch": 0.047466476800759466,
+ "grad_norm": 20656.185546875,
+ "learning_rate": 9.199e-05,
+ "loss": 0.5806,
+ "step": 9200
+ },
+ {
+ "epoch": 0.04772444678337229,
+ "grad_norm": 21228.814453125,
+ "learning_rate": 9.249e-05,
+ "loss": 0.5858,
+ "step": 9250
+ },
+ {
+ "epoch": 0.04798241676598511,
+ "grad_norm": 20801.869140625,
+ "learning_rate": 9.299e-05,
+ "loss": 0.5816,
+ "step": 9300
+ },
+ {
+ "epoch": 0.04824038674859793,
+ "grad_norm": 24044.283203125,
+ "learning_rate": 9.349e-05,
+ "loss": 0.5811,
+ "step": 9350
+ },
+ {
+ "epoch": 0.04849835673121076,
+ "grad_norm": 22395.47265625,
+ "learning_rate": 9.399e-05,
+ "loss": 0.5782,
+ "step": 9400
+ },
+ {
+ "epoch": 0.04875632671382358,
+ "grad_norm": 22353.078125,
+ "learning_rate": 9.449e-05,
+ "loss": 0.5758,
+ "step": 9450
+ },
+ {
+ "epoch": 0.0490142966964364,
+ "grad_norm": 22520.72265625,
+ "learning_rate": 9.499e-05,
+ "loss": 0.5752,
+ "step": 9500
+ },
+ {
+ "epoch": 0.04927226667904923,
+ "grad_norm": 22016.951171875,
+ "learning_rate": 9.549e-05,
+ "loss": 0.5764,
+ "step": 9550
+ },
+ {
+ "epoch": 0.04953023666166205,
+ "grad_norm": 20046.615234375,
+ "learning_rate": 9.599000000000001e-05,
+ "loss": 0.5759,
+ "step": 9600
+ },
+ {
+ "epoch": 0.04978820664427487,
+ "grad_norm": 21346.029296875,
+ "learning_rate": 9.649e-05,
+ "loss": 0.5798,
+ "step": 9650
+ },
+ {
+ "epoch": 0.05004617662688769,
+ "grad_norm": 22449.796875,
+ "learning_rate": 9.699e-05,
+ "loss": 0.5829,
+ "step": 9700
+ },
+ {
+ "epoch": 0.05030414660950052,
+ "grad_norm": 20538.751953125,
+ "learning_rate": 9.749e-05,
+ "loss": 0.5809,
+ "step": 9750
+ },
+ {
+ "epoch": 0.05056211659211334,
+ "grad_norm": 21123.19921875,
+ "learning_rate": 9.799e-05,
+ "loss": 0.5726,
+ "step": 9800
+ },
+ {
+ "epoch": 0.05082008657472616,
+ "grad_norm": 20853.08203125,
+ "learning_rate": 9.849e-05,
+ "loss": 0.5726,
+ "step": 9850
+ },
+ {
+ "epoch": 0.05107805655733899,
+ "grad_norm": 22160.841796875,
+ "learning_rate": 9.899e-05,
+ "loss": 0.5783,
+ "step": 9900
+ },
+ {
+ "epoch": 0.05133602653995181,
+ "grad_norm": 19711.109375,
+ "learning_rate": 9.949000000000001e-05,
+ "loss": 0.5722,
+ "step": 9950
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "grad_norm": 21442.310546875,
+ "learning_rate": 9.999000000000001e-05,
+ "loss": 0.5773,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "eval_loss": 0.5661358833312988,
+ "eval_runtime": 3272.6524,
+ "eval_samples_per_second": 947.586,
+ "eval_steps_per_second": 1.851,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051851966505177455,
+ "grad_norm": 21442.943359375,
+ "learning_rate": 9.999998718392692e-05,
+ "loss": 0.5727,
+ "step": 10050
+ },
+ {
+ "epoch": 0.05210993648779028,
+ "grad_norm": 21711.177734375,
+ "learning_rate": 9.999994768416664e-05,
+ "loss": 0.5707,
+ "step": 10100
+ },
+ {
+ "epoch": 0.052367906470403104,
+ "grad_norm": 21793.666015625,
+ "learning_rate": 9.999988149540251e-05,
+ "loss": 0.5727,
+ "step": 10150
+ },
+ {
+ "epoch": 0.052625876453015925,
+ "grad_norm": 18847.970703125,
+ "learning_rate": 9.999978861766983e-05,
+ "loss": 0.5726,
+ "step": 10200
+ },
+ {
+ "epoch": 0.052883846435628754,
+ "grad_norm": 22870.91796875,
+ "learning_rate": 9.999966905101816e-05,
+ "loss": 0.5751,
+ "step": 10250
+ },
+ {
+ "epoch": 0.053141816418241575,
+ "grad_norm": 23970.431640625,
+ "learning_rate": 9.999952279551135e-05,
+ "loss": 0.5745,
+ "step": 10300
+ },
+ {
+ "epoch": 0.053399786400854396,
+ "grad_norm": 19482.65625,
+ "learning_rate": 9.999934985122746e-05,
+ "loss": 0.5734,
+ "step": 10350
+ },
+ {
+ "epoch": 0.05365775638346722,
+ "grad_norm": 19720.65625,
+ "learning_rate": 9.999915021825879e-05,
+ "loss": 0.5697,
+ "step": 10400
+ },
+ {
+ "epoch": 0.053915726366080045,
+ "grad_norm": 21484.8203125,
+ "learning_rate": 9.99989238967119e-05,
+ "loss": 0.5678,
+ "step": 10450
+ },
+ {
+ "epoch": 0.05417369634869287,
+ "grad_norm": 20198.669921875,
+ "learning_rate": 9.999867088670762e-05,
+ "loss": 0.5731,
+ "step": 10500
+ },
+ {
+ "epoch": 0.05443166633130569,
+ "grad_norm": 19887.86328125,
+ "learning_rate": 9.999839118838099e-05,
+ "loss": 0.5711,
+ "step": 10550
+ },
+ {
+ "epoch": 0.05468963631391851,
+ "grad_norm": 21250.41796875,
+ "learning_rate": 9.999808480188131e-05,
+ "loss": 0.5653,
+ "step": 10600
+ },
+ {
+ "epoch": 0.05494760629653134,
+ "grad_norm": 21179.904296875,
+ "learning_rate": 9.999775172737211e-05,
+ "loss": 0.5666,
+ "step": 10650
+ },
+ {
+ "epoch": 0.05520557627914416,
+ "grad_norm": 21106.083984375,
+ "learning_rate": 9.999739196503119e-05,
+ "loss": 0.5656,
+ "step": 10700
+ },
+ {
+ "epoch": 0.05546354626175698,
+ "grad_norm": 19393.994140625,
+ "learning_rate": 9.999700551505057e-05,
+ "loss": 0.566,
+ "step": 10750
+ },
+ {
+ "epoch": 0.05572151624436981,
+ "grad_norm": 22788.060546875,
+ "learning_rate": 9.999659237763656e-05,
+ "loss": 0.5681,
+ "step": 10800
+ },
+ {
+ "epoch": 0.05597948622698263,
+ "grad_norm": 20106.75390625,
+ "learning_rate": 9.999615255300966e-05,
+ "loss": 0.5668,
+ "step": 10850
+ },
+ {
+ "epoch": 0.05623745620959545,
+ "grad_norm": 22390.466796875,
+ "learning_rate": 9.999568604140464e-05,
+ "loss": 0.5665,
+ "step": 10900
+ },
+ {
+ "epoch": 0.05649542619220827,
+ "grad_norm": 21145.044921875,
+ "learning_rate": 9.999519284307053e-05,
+ "loss": 0.5645,
+ "step": 10950
+ },
+ {
+ "epoch": 0.0567533961748211,
+ "grad_norm": 22501.64453125,
+ "learning_rate": 9.999467295827059e-05,
+ "loss": 0.5663,
+ "step": 11000
+ },
+ {
+ "epoch": 0.05701136615743392,
+ "grad_norm": 21079.431640625,
+ "learning_rate": 9.999412638728229e-05,
+ "loss": 0.5605,
+ "step": 11050
+ },
+ {
+ "epoch": 0.05726933614004674,
+ "grad_norm": 21501.4375,
+ "learning_rate": 9.999355313039742e-05,
+ "loss": 0.5643,
+ "step": 11100
+ },
+ {
+ "epoch": 0.05752730612265957,
+ "grad_norm": 22092.6328125,
+ "learning_rate": 9.999295318792194e-05,
+ "loss": 0.5602,
+ "step": 11150
+ },
+ {
+ "epoch": 0.05778527610527239,
+ "grad_norm": 19948.81640625,
+ "learning_rate": 9.999232656017613e-05,
+ "loss": 0.5649,
+ "step": 11200
+ },
+ {
+ "epoch": 0.05804324608788521,
+ "grad_norm": 20543.5859375,
+ "learning_rate": 9.999167324749443e-05,
+ "loss": 0.5598,
+ "step": 11250
+ },
+ {
+ "epoch": 0.058301216070498034,
+ "grad_norm": 20948.060546875,
+ "learning_rate": 9.99909932502256e-05,
+ "loss": 0.5631,
+ "step": 11300
+ },
+ {
+ "epoch": 0.05855918605311086,
+ "grad_norm": 20384.732421875,
+ "learning_rate": 9.999028656873257e-05,
+ "loss": 0.5592,
+ "step": 11350
+ },
+ {
+ "epoch": 0.05881715603572368,
+ "grad_norm": 20027.615234375,
+ "learning_rate": 9.99895532033926e-05,
+ "loss": 0.5658,
+ "step": 11400
+ },
+ {
+ "epoch": 0.059075126018336505,
+ "grad_norm": 20702.263671875,
+ "learning_rate": 9.99887931545971e-05,
+ "loss": 0.56,
+ "step": 11450
+ },
+ {
+ "epoch": 0.05933309600094933,
+ "grad_norm": 21589.52734375,
+ "learning_rate": 9.99880064227518e-05,
+ "loss": 0.5595,
+ "step": 11500
+ },
+ {
+ "epoch": 0.059591065983562154,
+ "grad_norm": 20375.181640625,
+ "learning_rate": 9.998719300827663e-05,
+ "loss": 0.5627,
+ "step": 11550
+ },
+ {
+ "epoch": 0.059849035966174975,
+ "grad_norm": 20207.677734375,
+ "learning_rate": 9.998635291160577e-05,
+ "loss": 0.5615,
+ "step": 11600
+ },
+ {
+ "epoch": 0.060107005948787796,
+ "grad_norm": 20898.291015625,
+ "learning_rate": 9.998548613318767e-05,
+ "loss": 0.5594,
+ "step": 11650
+ },
+ {
+ "epoch": 0.060364975931400625,
+ "grad_norm": 20133.822265625,
+ "learning_rate": 9.998459267348497e-05,
+ "loss": 0.5631,
+ "step": 11700
+ },
+ {
+ "epoch": 0.060622945914013446,
+ "grad_norm": 19021.533203125,
+ "learning_rate": 9.99836725329746e-05,
+ "loss": 0.5576,
+ "step": 11750
+ },
+ {
+ "epoch": 0.06088091589662627,
+ "grad_norm": 19088.32421875,
+ "learning_rate": 9.998272571214772e-05,
+ "loss": 0.5619,
+ "step": 11800
+ },
+ {
+ "epoch": 0.061138885879239095,
+ "grad_norm": 19742.841796875,
+ "learning_rate": 9.99817522115097e-05,
+ "loss": 0.5626,
+ "step": 11850
+ },
+ {
+ "epoch": 0.061396855861851916,
+ "grad_norm": 21584.271484375,
+ "learning_rate": 9.99807520315802e-05,
+ "loss": 0.555,
+ "step": 11900
+ },
+ {
+ "epoch": 0.06165482584446474,
+ "grad_norm": 19766.76953125,
+ "learning_rate": 9.997972517289309e-05,
+ "loss": 0.5584,
+ "step": 11950
+ },
+ {
+ "epoch": 0.06191279582707756,
+ "grad_norm": 19821.556640625,
+ "learning_rate": 9.997867163599646e-05,
+ "loss": 0.5623,
+ "step": 12000
+ },
+ {
+ "epoch": 0.06217076580969039,
+ "grad_norm": 19488.490234375,
+ "learning_rate": 9.997759142145271e-05,
+ "loss": 0.5591,
+ "step": 12050
+ },
+ {
+ "epoch": 0.06242873579230321,
+ "grad_norm": 20093.806640625,
+ "learning_rate": 9.997648452983842e-05,
+ "loss": 0.5597,
+ "step": 12100
+ },
+ {
+ "epoch": 0.06268670577491603,
+ "grad_norm": 20202.154296875,
+ "learning_rate": 9.997535096174441e-05,
+ "loss": 0.5542,
+ "step": 12150
+ },
+ {
+ "epoch": 0.06294467575752885,
+ "grad_norm": 19978.154296875,
+ "learning_rate": 9.99741907177758e-05,
+ "loss": 0.5629,
+ "step": 12200
+ },
+ {
+ "epoch": 0.06320264574014167,
+ "grad_norm": 19697.005859375,
+ "learning_rate": 9.997300379855186e-05,
+ "loss": 0.5571,
+ "step": 12250
+ },
+ {
+ "epoch": 0.06346061572275451,
+ "grad_norm": 20384.287109375,
+ "learning_rate": 9.997179020470618e-05,
+ "loss": 0.5526,
+ "step": 12300
+ },
+ {
+ "epoch": 0.06371858570536733,
+ "grad_norm": 18652.044921875,
+ "learning_rate": 9.997054993688651e-05,
+ "loss": 0.5531,
+ "step": 12350
+ },
+ {
+ "epoch": 0.06397655568798015,
+ "grad_norm": 20133.990234375,
+ "learning_rate": 9.996928299575493e-05,
+ "loss": 0.5561,
+ "step": 12400
+ },
+ {
+ "epoch": 0.06423452567059297,
+ "grad_norm": 20575.875,
+ "learning_rate": 9.996798938198766e-05,
+ "loss": 0.5559,
+ "step": 12450
+ },
+ {
+ "epoch": 0.06449249565320579,
+ "grad_norm": 19524.828125,
+ "learning_rate": 9.996666909627525e-05,
+ "loss": 0.5437,
+ "step": 12500
+ },
+ {
+ "epoch": 0.06475046563581861,
+ "grad_norm": 22106.927734375,
+ "learning_rate": 9.996532213932242e-05,
+ "loss": 0.5691,
+ "step": 12550
+ },
+ {
+ "epoch": 0.06500843561843143,
+ "grad_norm": 18443.4609375,
+ "learning_rate": 9.996394851184814e-05,
+ "loss": 0.553,
+ "step": 12600
+ },
+ {
+ "epoch": 0.06526640560104426,
+ "grad_norm": 21786.943359375,
+ "learning_rate": 9.996254821458565e-05,
+ "loss": 0.562,
+ "step": 12650
+ },
+ {
+ "epoch": 0.06552437558365709,
+ "grad_norm": 22699.578125,
+ "learning_rate": 9.996112124828241e-05,
+ "loss": 0.5526,
+ "step": 12700
+ },
+ {
+ "epoch": 0.06578234556626991,
+ "grad_norm": 18522.822265625,
+ "learning_rate": 9.995966761370006e-05,
+ "loss": 0.5525,
+ "step": 12750
+ },
+ {
+ "epoch": 0.06604031554888273,
+ "grad_norm": 19723.44140625,
+ "learning_rate": 9.995818731161458e-05,
+ "loss": 0.5555,
+ "step": 12800
+ },
+ {
+ "epoch": 0.06629828553149555,
+ "grad_norm": 20643.173828125,
+ "learning_rate": 9.995668034281606e-05,
+ "loss": 0.5506,
+ "step": 12850
+ },
+ {
+ "epoch": 0.06655625551410838,
+ "grad_norm": 19303.68359375,
+ "learning_rate": 9.995514670810896e-05,
+ "loss": 0.5599,
+ "step": 12900
+ },
+ {
+ "epoch": 0.0668142254967212,
+ "grad_norm": 19837.240234375,
+ "learning_rate": 9.995358640831187e-05,
+ "loss": 0.5514,
+ "step": 12950
+ },
+ {
+ "epoch": 0.06707219547933402,
+ "grad_norm": 19212.25390625,
+ "learning_rate": 9.995199944425764e-05,
+ "loss": 0.5542,
+ "step": 13000
+ },
+ {
+ "epoch": 0.06733016546194685,
+ "grad_norm": 19908.70703125,
+ "learning_rate": 9.995038581679337e-05,
+ "loss": 0.5421,
+ "step": 13050
+ },
+ {
+ "epoch": 0.06758813544455967,
+ "grad_norm": 18933.306640625,
+ "learning_rate": 9.994874552678038e-05,
+ "loss": 0.549,
+ "step": 13100
+ },
+ {
+ "epoch": 0.0678461054271725,
+ "grad_norm": 19313.990234375,
+ "learning_rate": 9.994707857509422e-05,
+ "loss": 0.5569,
+ "step": 13150
+ },
+ {
+ "epoch": 0.06810407540978532,
+ "grad_norm": 20800.984375,
+ "learning_rate": 9.99453849626247e-05,
+ "loss": 0.5518,
+ "step": 13200
+ },
+ {
+ "epoch": 0.06836204539239814,
+ "grad_norm": 18623.361328125,
+ "learning_rate": 9.994366469027583e-05,
+ "loss": 0.5549,
+ "step": 13250
+ },
+ {
+ "epoch": 0.06862001537501096,
+ "grad_norm": 19761.654296875,
+ "learning_rate": 9.994191775896584e-05,
+ "loss": 0.5467,
+ "step": 13300
+ },
+ {
+ "epoch": 0.06887798535762378,
+ "grad_norm": 20618.501953125,
+ "learning_rate": 9.994014416962723e-05,
+ "loss": 0.5554,
+ "step": 13350
+ },
+ {
+ "epoch": 0.06913595534023662,
+ "grad_norm": 19279.791015625,
+ "learning_rate": 9.993834392320668e-05,
+ "loss": 0.5567,
+ "step": 13400
+ },
+ {
+ "epoch": 0.06939392532284944,
+ "grad_norm": 18802.34375,
+ "learning_rate": 9.993651702066516e-05,
+ "loss": 0.5608,
+ "step": 13450
+ },
+ {
+ "epoch": 0.06965189530546226,
+ "grad_norm": 20132.15625,
+ "learning_rate": 9.993466346297779e-05,
+ "loss": 0.547,
+ "step": 13500
+ },
+ {
+ "epoch": 0.06990986528807508,
+ "grad_norm": 19165.26171875,
+ "learning_rate": 9.993278325113403e-05,
+ "loss": 0.5485,
+ "step": 13550
+ },
+ {
+ "epoch": 0.0701678352706879,
+ "grad_norm": 18493.01171875,
+ "learning_rate": 9.993087638613743e-05,
+ "loss": 0.5455,
+ "step": 13600
+ },
+ {
+ "epoch": 0.07042580525330072,
+ "grad_norm": 18225.78125,
+ "learning_rate": 9.992894286900589e-05,
+ "loss": 0.5499,
+ "step": 13650
+ },
+ {
+ "epoch": 0.07068377523591354,
+ "grad_norm": 20189.802734375,
+ "learning_rate": 9.992698270077146e-05,
+ "loss": 0.5468,
+ "step": 13700
+ },
+ {
+ "epoch": 0.07094174521852638,
+ "grad_norm": 20861.2734375,
+ "learning_rate": 9.992499588248043e-05,
+ "loss": 0.5588,
+ "step": 13750
+ },
+ {
+ "epoch": 0.0711997152011392,
+ "grad_norm": 19876.689453125,
+ "learning_rate": 9.992298241519335e-05,
+ "loss": 0.5486,
+ "step": 13800
+ },
+ {
+ "epoch": 0.07145768518375202,
+ "grad_norm": 18371.142578125,
+ "learning_rate": 9.992094229998497e-05,
+ "loss": 0.5475,
+ "step": 13850
+ },
+ {
+ "epoch": 0.07171565516636484,
+ "grad_norm": 18274.396484375,
+ "learning_rate": 9.991887553794423e-05,
+ "loss": 0.549,
+ "step": 13900
+ },
+ {
+ "epoch": 0.07197362514897766,
+ "grad_norm": 18204.947265625,
+ "learning_rate": 9.991678213017437e-05,
+ "loss": 0.5419,
+ "step": 13950
+ },
+ {
+ "epoch": 0.07223159513159048,
+ "grad_norm": 18634.162109375,
+ "learning_rate": 9.991466207779278e-05,
+ "loss": 0.5528,
+ "step": 14000
+ },
+ {
+ "epoch": 0.0724895651142033,
+ "grad_norm": 21840.685546875,
+ "learning_rate": 9.991251538193112e-05,
+ "loss": 0.5492,
+ "step": 14050
+ },
+ {
+ "epoch": 0.07274753509681614,
+ "grad_norm": 18888.935546875,
+ "learning_rate": 9.991034204373524e-05,
+ "loss": 0.5504,
+ "step": 14100
+ },
+ {
+ "epoch": 0.07300550507942896,
+ "grad_norm": 19353.263671875,
+ "learning_rate": 9.990814206436524e-05,
+ "loss": 0.5425,
+ "step": 14150
+ },
+ {
+ "epoch": 0.07326347506204178,
+ "grad_norm": 18891.79296875,
+ "learning_rate": 9.990591544499543e-05,
+ "loss": 0.551,
+ "step": 14200
+ },
+ {
+ "epoch": 0.0735214450446546,
+ "grad_norm": 17878.33203125,
+ "learning_rate": 9.99036621868143e-05,
+ "loss": 0.5403,
+ "step": 14250
+ },
+ {
+ "epoch": 0.07377941502726743,
+ "grad_norm": 18997.544921875,
+ "learning_rate": 9.990138229102465e-05,
+ "loss": 0.5458,
+ "step": 14300
+ },
+ {
+ "epoch": 0.07403738500988025,
+ "grad_norm": 22162.03125,
+ "learning_rate": 9.989907575884341e-05,
+ "loss": 0.5482,
+ "step": 14350
+ },
+ {
+ "epoch": 0.07429535499249307,
+ "grad_norm": 17026.828125,
+ "learning_rate": 9.989674259150177e-05,
+ "loss": 0.5487,
+ "step": 14400
+ },
+ {
+ "epoch": 0.0745533249751059,
+ "grad_norm": 18335.169921875,
+ "learning_rate": 9.989438279024513e-05,
+ "loss": 0.5459,
+ "step": 14450
+ },
+ {
+ "epoch": 0.07481129495771872,
+ "grad_norm": 19508.666015625,
+ "learning_rate": 9.989199635633309e-05,
+ "loss": 0.5456,
+ "step": 14500
+ },
+ {
+ "epoch": 0.07506926494033155,
+ "grad_norm": 20281.28515625,
+ "learning_rate": 9.98895832910395e-05,
+ "loss": 0.5455,
+ "step": 14550
+ },
+ {
+ "epoch": 0.07532723492294437,
+ "grad_norm": 20196.259765625,
+ "learning_rate": 9.98871435956524e-05,
+ "loss": 0.5474,
+ "step": 14600
+ },
+ {
+ "epoch": 0.07558520490555719,
+ "grad_norm": 18934.544921875,
+ "learning_rate": 9.988467727147409e-05,
+ "loss": 0.546,
+ "step": 14650
+ },
+ {
+ "epoch": 0.07584317488817001,
+ "grad_norm": 20257.126953125,
+ "learning_rate": 9.988218431982098e-05,
+ "loss": 0.5443,
+ "step": 14700
+ },
+ {
+ "epoch": 0.07610114487078283,
+ "grad_norm": 20330.86328125,
+ "learning_rate": 9.98796647420238e-05,
+ "loss": 0.5423,
+ "step": 14750
+ },
+ {
+ "epoch": 0.07635911485339567,
+ "grad_norm": 19077.765625,
+ "learning_rate": 9.987711853942745e-05,
+ "loss": 0.5446,
+ "step": 14800
+ },
+ {
+ "epoch": 0.07661708483600849,
+ "grad_norm": 20855.169921875,
+ "learning_rate": 9.987454571339103e-05,
+ "loss": 0.5427,
+ "step": 14850
+ },
+ {
+ "epoch": 0.07687505481862131,
+ "grad_norm": 20556.005859375,
+ "learning_rate": 9.987194626528788e-05,
+ "loss": 0.5417,
+ "step": 14900
+ },
+ {
+ "epoch": 0.07713302480123413,
+ "grad_norm": 19028.7421875,
+ "learning_rate": 9.986932019650553e-05,
+ "loss": 0.5412,
+ "step": 14950
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "grad_norm": 18669.166015625,
+ "learning_rate": 9.986666750844572e-05,
+ "loss": 0.5404,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "eval_loss": 0.5350670218467712,
+ "eval_runtime": 3217.7876,
+ "eval_samples_per_second": 963.743,
+ "eval_steps_per_second": 1.882,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07764896476645977,
+ "grad_norm": 19965.779296875,
+ "learning_rate": 9.98639882025244e-05,
+ "loss": 0.5439,
+ "step": 15050
+ },
+ {
+ "epoch": 0.07790693474907259,
+ "grad_norm": 18329.9921875,
+ "learning_rate": 9.986128228017173e-05,
+ "loss": 0.5425,
+ "step": 15100
+ },
+ {
+ "epoch": 0.07816490473168543,
+ "grad_norm": 20102.005859375,
+ "learning_rate": 9.985854974283211e-05,
+ "loss": 0.5444,
+ "step": 15150
+ },
+ {
+ "epoch": 0.07842287471429825,
+ "grad_norm": 19234.671875,
+ "learning_rate": 9.985579059196406e-05,
+ "loss": 0.5443,
+ "step": 15200
+ },
+ {
+ "epoch": 0.07868084469691107,
+ "grad_norm": 18324.298828125,
+ "learning_rate": 9.985300482904041e-05,
+ "loss": 0.5419,
+ "step": 15250
+ },
+ {
+ "epoch": 0.07893881467952389,
+ "grad_norm": 18766.2734375,
+ "learning_rate": 9.985019245554814e-05,
+ "loss": 0.5412,
+ "step": 15300
+ },
+ {
+ "epoch": 0.07919678466213671,
+ "grad_norm": 18805.765625,
+ "learning_rate": 9.984735347298841e-05,
+ "loss": 0.5443,
+ "step": 15350
+ },
+ {
+ "epoch": 0.07945475464474953,
+ "grad_norm": 17677.30078125,
+ "learning_rate": 9.984448788287665e-05,
+ "loss": 0.5421,
+ "step": 15400
+ },
+ {
+ "epoch": 0.07971272462736236,
+ "grad_norm": 19851.3515625,
+ "learning_rate": 9.984159568674243e-05,
+ "loss": 0.5426,
+ "step": 15450
+ },
+ {
+ "epoch": 0.07997069460997518,
+ "grad_norm": 18453.05859375,
+ "learning_rate": 9.983867688612956e-05,
+ "loss": 0.5445,
+ "step": 15500
+ },
+ {
+ "epoch": 0.08022866459258801,
+ "grad_norm": 17366.869140625,
+ "learning_rate": 9.983573148259603e-05,
+ "loss": 0.5451,
+ "step": 15550
+ },
+ {
+ "epoch": 0.08048663457520083,
+ "grad_norm": 18628.716796875,
+ "learning_rate": 9.983275947771407e-05,
+ "loss": 0.5373,
+ "step": 15600
+ },
+ {
+ "epoch": 0.08074460455781365,
+ "grad_norm": 19403.87890625,
+ "learning_rate": 9.982976087307003e-05,
+ "loss": 0.5489,
+ "step": 15650
+ },
+ {
+ "epoch": 0.08100257454042648,
+ "grad_norm": 18485.71875,
+ "learning_rate": 9.982673567026455e-05,
+ "loss": 0.538,
+ "step": 15700
+ },
+ {
+ "epoch": 0.0812605445230393,
+ "grad_norm": 19837.1796875,
+ "learning_rate": 9.982368387091241e-05,
+ "loss": 0.5356,
+ "step": 15750
+ },
+ {
+ "epoch": 0.08151851450565212,
+ "grad_norm": 19505.34375,
+ "learning_rate": 9.982060547664258e-05,
+ "loss": 0.5356,
+ "step": 15800
+ },
+ {
+ "epoch": 0.08177648448826494,
+ "grad_norm": 18645.48828125,
+ "learning_rate": 9.981750048909828e-05,
+ "loss": 0.5381,
+ "step": 15850
+ },
+ {
+ "epoch": 0.08203445447087777,
+ "grad_norm": 20191.73828125,
+ "learning_rate": 9.981436890993689e-05,
+ "loss": 0.535,
+ "step": 15900
+ },
+ {
+ "epoch": 0.0822924244534906,
+ "grad_norm": 18908.15625,
+ "learning_rate": 9.981121074082995e-05,
+ "loss": 0.5405,
+ "step": 15950
+ },
+ {
+ "epoch": 0.08255039443610342,
+ "grad_norm": 19517.73828125,
+ "learning_rate": 9.980802598346326e-05,
+ "loss": 0.5407,
+ "step": 16000
+ },
+ {
+ "epoch": 0.08280836441871624,
+ "grad_norm": 18368.16015625,
+ "learning_rate": 9.980481463953679e-05,
+ "loss": 0.5391,
+ "step": 16050
+ },
+ {
+ "epoch": 0.08306633440132906,
+ "grad_norm": 19727.35546875,
+ "learning_rate": 9.980157671076466e-05,
+ "loss": 0.537,
+ "step": 16100
+ },
+ {
+ "epoch": 0.08332430438394188,
+ "grad_norm": 20757.890625,
+ "learning_rate": 9.979831219887525e-05,
+ "loss": 0.5408,
+ "step": 16150
+ },
+ {
+ "epoch": 0.0835822743665547,
+ "grad_norm": 19334.708984375,
+ "learning_rate": 9.979502110561108e-05,
+ "loss": 0.5371,
+ "step": 16200
+ },
+ {
+ "epoch": 0.08384024434916754,
+ "grad_norm": 19338.498046875,
+ "learning_rate": 9.979170343272886e-05,
+ "loss": 0.531,
+ "step": 16250
+ },
+ {
+ "epoch": 0.08409821433178036,
+ "grad_norm": 18722.365234375,
+ "learning_rate": 9.978835918199949e-05,
+ "loss": 0.5398,
+ "step": 16300
+ },
+ {
+ "epoch": 0.08435618431439318,
+ "grad_norm": 18026.109375,
+ "learning_rate": 9.97849883552081e-05,
+ "loss": 0.5423,
+ "step": 16350
+ },
+ {
+ "epoch": 0.084614154297006,
+ "grad_norm": 19646.78125,
+ "learning_rate": 9.978159095415396e-05,
+ "loss": 0.5387,
+ "step": 16400
+ },
+ {
+ "epoch": 0.08487212427961882,
+ "grad_norm": 20091.552734375,
+ "learning_rate": 9.977816698065052e-05,
+ "loss": 0.5376,
+ "step": 16450
+ },
+ {
+ "epoch": 0.08513009426223164,
+ "grad_norm": 20539.73046875,
+ "learning_rate": 9.977471643652546e-05,
+ "loss": 0.5333,
+ "step": 16500
+ },
+ {
+ "epoch": 0.08538806424484446,
+ "grad_norm": 18306.24609375,
+ "learning_rate": 9.977123932362059e-05,
+ "loss": 0.5405,
+ "step": 16550
+ },
+ {
+ "epoch": 0.0856460342274573,
+ "grad_norm": 20133.513671875,
+ "learning_rate": 9.976773564379193e-05,
+ "loss": 0.541,
+ "step": 16600
+ },
+ {
+ "epoch": 0.08590400421007012,
+ "grad_norm": 19533.50390625,
+ "learning_rate": 9.976420539890969e-05,
+ "loss": 0.5333,
+ "step": 16650
+ },
+ {
+ "epoch": 0.08616197419268294,
+ "grad_norm": 19509.087890625,
+ "learning_rate": 9.976064859085822e-05,
+ "loss": 0.5347,
+ "step": 16700
+ },
+ {
+ "epoch": 0.08641994417529576,
+ "grad_norm": 19590.818359375,
+ "learning_rate": 9.97570652215361e-05,
+ "loss": 0.5377,
+ "step": 16750
+ },
+ {
+ "epoch": 0.08667791415790858,
+ "grad_norm": 19510.705078125,
+ "learning_rate": 9.975345529285605e-05,
+ "loss": 0.5367,
+ "step": 16800
+ },
+ {
+ "epoch": 0.0869358841405214,
+ "grad_norm": 20015.8046875,
+ "learning_rate": 9.974981880674499e-05,
+ "loss": 0.5386,
+ "step": 16850
+ },
+ {
+ "epoch": 0.08719385412313423,
+ "grad_norm": 18704.03125,
+ "learning_rate": 9.974615576514399e-05,
+ "loss": 0.5361,
+ "step": 16900
+ },
+ {
+ "epoch": 0.08745182410574706,
+ "grad_norm": 18257.869140625,
+ "learning_rate": 9.974246617000832e-05,
+ "loss": 0.5304,
+ "step": 16950
+ },
+ {
+ "epoch": 0.08770979408835988,
+ "grad_norm": 18150.517578125,
+ "learning_rate": 9.973875002330743e-05,
+ "loss": 0.5289,
+ "step": 17000
+ },
+ {
+ "epoch": 0.0879677640709727,
+ "grad_norm": 18326.041015625,
+ "learning_rate": 9.97350073270249e-05,
+ "loss": 0.5347,
+ "step": 17050
+ },
+ {
+ "epoch": 0.08822573405358553,
+ "grad_norm": 18199.224609375,
+ "learning_rate": 9.973123808315852e-05,
+ "loss": 0.5269,
+ "step": 17100
+ },
+ {
+ "epoch": 0.08848370403619835,
+ "grad_norm": 20351.447265625,
+ "learning_rate": 9.972744229372025e-05,
+ "loss": 0.5334,
+ "step": 17150
+ },
+ {
+ "epoch": 0.08874167401881117,
+ "grad_norm": 19200.703125,
+ "learning_rate": 9.97236199607362e-05,
+ "loss": 0.5316,
+ "step": 17200
+ },
+ {
+ "epoch": 0.08899964400142399,
+ "grad_norm": 18855.7890625,
+ "learning_rate": 9.971977108624664e-05,
+ "loss": 0.5342,
+ "step": 17250
+ },
+ {
+ "epoch": 0.08925761398403682,
+ "grad_norm": 18889.56640625,
+ "learning_rate": 9.971589567230606e-05,
+ "loss": 0.5361,
+ "step": 17300
+ },
+ {
+ "epoch": 0.08951558396664965,
+ "grad_norm": 18003.9921875,
+ "learning_rate": 9.971199372098304e-05,
+ "loss": 0.5353,
+ "step": 17350
+ },
+ {
+ "epoch": 0.08977355394926247,
+ "grad_norm": 19555.30078125,
+ "learning_rate": 9.970806523436041e-05,
+ "loss": 0.5306,
+ "step": 17400
+ },
+ {
+ "epoch": 0.09003152393187529,
+ "grad_norm": 19433.37890625,
+ "learning_rate": 9.97041102145351e-05,
+ "loss": 0.5341,
+ "step": 17450
+ },
+ {
+ "epoch": 0.09028949391448811,
+ "grad_norm": 19238.341796875,
+ "learning_rate": 9.97001286636182e-05,
+ "loss": 0.5372,
+ "step": 17500
+ },
+ {
+ "epoch": 0.09054746389710093,
+ "grad_norm": 18698.78125,
+ "learning_rate": 9.969612058373502e-05,
+ "loss": 0.5356,
+ "step": 17550
+ },
+ {
+ "epoch": 0.09080543387971375,
+ "grad_norm": 17953.580078125,
+ "learning_rate": 9.969208597702497e-05,
+ "loss": 0.529,
+ "step": 17600
+ },
+ {
+ "epoch": 0.09106340386232659,
+ "grad_norm": 17678.716796875,
+ "learning_rate": 9.968802484564168e-05,
+ "loss": 0.5329,
+ "step": 17650
+ },
+ {
+ "epoch": 0.09132137384493941,
+ "grad_norm": 20412.287109375,
+ "learning_rate": 9.968393719175286e-05,
+ "loss": 0.534,
+ "step": 17700
+ },
+ {
+ "epoch": 0.09157934382755223,
+ "grad_norm": 20080.16015625,
+ "learning_rate": 9.967982301754044e-05,
+ "loss": 0.5307,
+ "step": 17750
+ },
+ {
+ "epoch": 0.09183731381016505,
+ "grad_norm": 18570.314453125,
+ "learning_rate": 9.96756823252005e-05,
+ "loss": 0.526,
+ "step": 17800
+ },
+ {
+ "epoch": 0.09209528379277787,
+ "grad_norm": 18329.107421875,
+ "learning_rate": 9.967151511694324e-05,
+ "loss": 0.5273,
+ "step": 17850
+ },
+ {
+ "epoch": 0.09235325377539069,
+ "grad_norm": 19036.18359375,
+ "learning_rate": 9.966732139499304e-05,
+ "loss": 0.5275,
+ "step": 17900
+ },
+ {
+ "epoch": 0.09261122375800351,
+ "grad_norm": 18708.826171875,
+ "learning_rate": 9.966310116158844e-05,
+ "loss": 0.5313,
+ "step": 17950
+ },
+ {
+ "epoch": 0.09286919374061635,
+ "grad_norm": 18660.791015625,
+ "learning_rate": 9.96588544189821e-05,
+ "loss": 0.5303,
+ "step": 18000
+ },
+ {
+ "epoch": 0.09312716372322917,
+ "grad_norm": 19709.181640625,
+ "learning_rate": 9.965458116944086e-05,
+ "loss": 0.5347,
+ "step": 18050
+ },
+ {
+ "epoch": 0.09338513370584199,
+ "grad_norm": 19683.798828125,
+ "learning_rate": 9.96502814152457e-05,
+ "loss": 0.5359,
+ "step": 18100
+ },
+ {
+ "epoch": 0.09364310368845481,
+ "grad_norm": 19533.09765625,
+ "learning_rate": 9.964595515869175e-05,
+ "loss": 0.5263,
+ "step": 18150
+ },
+ {
+ "epoch": 0.09390107367106763,
+ "grad_norm": 20254.892578125,
+ "learning_rate": 9.964160240208826e-05,
+ "loss": 0.5307,
+ "step": 18200
+ },
+ {
+ "epoch": 0.09415904365368045,
+ "grad_norm": 21316.876953125,
+ "learning_rate": 9.963722314775868e-05,
+ "loss": 0.5316,
+ "step": 18250
+ },
+ {
+ "epoch": 0.09441701363629328,
+ "grad_norm": 20027.03515625,
+ "learning_rate": 9.963281739804054e-05,
+ "loss": 0.5274,
+ "step": 18300
+ },
+ {
+ "epoch": 0.0946749836189061,
+ "grad_norm": 18551.994140625,
+ "learning_rate": 9.962838515528554e-05,
+ "loss": 0.5339,
+ "step": 18350
+ },
+ {
+ "epoch": 0.09493295360151893,
+ "grad_norm": 17779.97265625,
+ "learning_rate": 9.962392642185956e-05,
+ "loss": 0.5301,
+ "step": 18400
+ },
+ {
+ "epoch": 0.09519092358413175,
+ "grad_norm": 20620.232421875,
+ "learning_rate": 9.961944120014256e-05,
+ "loss": 0.522,
+ "step": 18450
+ },
+ {
+ "epoch": 0.09544889356674457,
+ "grad_norm": 18669.73046875,
+ "learning_rate": 9.961492949252868e-05,
+ "loss": 0.5261,
+ "step": 18500
+ },
+ {
+ "epoch": 0.0957068635493574,
+ "grad_norm": 19528.4765625,
+ "learning_rate": 9.961039130142617e-05,
+ "loss": 0.5276,
+ "step": 18550
+ },
+ {
+ "epoch": 0.09596483353197022,
+ "grad_norm": 19643.099609375,
+ "learning_rate": 9.960582662925744e-05,
+ "loss": 0.5332,
+ "step": 18600
+ },
+ {
+ "epoch": 0.09622280351458304,
+ "grad_norm": 19024.4375,
+ "learning_rate": 9.960123547845901e-05,
+ "loss": 0.529,
+ "step": 18650
+ },
+ {
+ "epoch": 0.09648077349719586,
+ "grad_norm": 20228.248046875,
+ "learning_rate": 9.959661785148155e-05,
+ "loss": 0.5322,
+ "step": 18700
+ },
+ {
+ "epoch": 0.0967387434798087,
+ "grad_norm": 20120.126953125,
+ "learning_rate": 9.959197375078986e-05,
+ "loss": 0.5256,
+ "step": 18750
+ },
+ {
+ "epoch": 0.09699671346242152,
+ "grad_norm": 19894.423828125,
+ "learning_rate": 9.95873031788629e-05,
+ "loss": 0.5257,
+ "step": 18800
+ },
+ {
+ "epoch": 0.09725468344503434,
+ "grad_norm": 18450.8671875,
+ "learning_rate": 9.958260613819367e-05,
+ "loss": 0.5268,
+ "step": 18850
+ },
+ {
+ "epoch": 0.09751265342764716,
+ "grad_norm": 22775.53125,
+ "learning_rate": 9.95778826312894e-05,
+ "loss": 0.5293,
+ "step": 18900
+ },
+ {
+ "epoch": 0.09777062341025998,
+ "grad_norm": 17769.38671875,
+ "learning_rate": 9.95731326606714e-05,
+ "loss": 0.5281,
+ "step": 18950
+ },
+ {
+ "epoch": 0.0980285933928728,
+ "grad_norm": 20731.322265625,
+ "learning_rate": 9.956835622887514e-05,
+ "loss": 0.5327,
+ "step": 19000
+ },
+ {
+ "epoch": 0.09828656337548562,
+ "grad_norm": 20059.11328125,
+ "learning_rate": 9.956355333845014e-05,
+ "loss": 0.5279,
+ "step": 19050
+ },
+ {
+ "epoch": 0.09854453335809846,
+ "grad_norm": 17477.626953125,
+ "learning_rate": 9.955872399196012e-05,
+ "loss": 0.5257,
+ "step": 19100
+ },
+ {
+ "epoch": 0.09880250334071128,
+ "grad_norm": 20293.232421875,
+ "learning_rate": 9.955386819198287e-05,
+ "loss": 0.5258,
+ "step": 19150
+ },
+ {
+ "epoch": 0.0990604733233241,
+ "grad_norm": 19330.4140625,
+ "learning_rate": 9.954898594111035e-05,
+ "loss": 0.5231,
+ "step": 19200
+ },
+ {
+ "epoch": 0.09931844330593692,
+ "grad_norm": 19410.818359375,
+ "learning_rate": 9.954407724194858e-05,
+ "loss": 0.5286,
+ "step": 19250
+ },
+ {
+ "epoch": 0.09957641328854974,
+ "grad_norm": 18320.552734375,
+ "learning_rate": 9.953914209711775e-05,
+ "loss": 0.5287,
+ "step": 19300
+ },
+ {
+ "epoch": 0.09983438327116256,
+ "grad_norm": 17585.583984375,
+ "learning_rate": 9.953418050925213e-05,
+ "loss": 0.5265,
+ "step": 19350
+ },
+ {
+ "epoch": 0.10009235325377538,
+ "grad_norm": 20318.298828125,
+ "learning_rate": 9.952919248100012e-05,
+ "loss": 0.5292,
+ "step": 19400
+ },
+ {
+ "epoch": 0.10035032323638822,
+ "grad_norm": 20239.33984375,
+ "learning_rate": 9.952417801502426e-05,
+ "loss": 0.522,
+ "step": 19450
+ },
+ {
+ "epoch": 0.10060829321900104,
+ "grad_norm": 18922.158203125,
+ "learning_rate": 9.951913711400115e-05,
+ "loss": 0.5275,
+ "step": 19500
+ },
+ {
+ "epoch": 0.10086626320161386,
+ "grad_norm": 18332.673828125,
+ "learning_rate": 9.951406978062153e-05,
+ "loss": 0.5282,
+ "step": 19550
+ },
+ {
+ "epoch": 0.10112423318422668,
+ "grad_norm": 19321.662109375,
+ "learning_rate": 9.950897601759024e-05,
+ "loss": 0.5236,
+ "step": 19600
+ },
+ {
+ "epoch": 0.1013822031668395,
+ "grad_norm": 19050.42578125,
+ "learning_rate": 9.950385582762624e-05,
+ "loss": 0.5269,
+ "step": 19650
+ },
+ {
+ "epoch": 0.10164017314945233,
+ "grad_norm": 18592.8125,
+ "learning_rate": 9.949870921346259e-05,
+ "loss": 0.5294,
+ "step": 19700
+ },
+ {
+ "epoch": 0.10189814313206515,
+ "grad_norm": 17702.080078125,
+ "learning_rate": 9.949353617784644e-05,
+ "loss": 0.5321,
+ "step": 19750
+ },
+ {
+ "epoch": 0.10215611311467798,
+ "grad_norm": 18935.71875,
+ "learning_rate": 9.948833672353907e-05,
+ "loss": 0.5279,
+ "step": 19800
+ },
+ {
+ "epoch": 0.1024140830972908,
+ "grad_norm": 19814.96484375,
+ "learning_rate": 9.948311085331585e-05,
+ "loss": 0.5174,
+ "step": 19850
+ },
+ {
+ "epoch": 0.10267205307990362,
+ "grad_norm": 18945.4375,
+ "learning_rate": 9.947785856996623e-05,
+ "loss": 0.525,
+ "step": 19900
+ },
+ {
+ "epoch": 0.10293002306251645,
+ "grad_norm": 19162.28125,
+ "learning_rate": 9.947257987629379e-05,
+ "loss": 0.5268,
+ "step": 19950
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "grad_norm": 18814.861328125,
+ "learning_rate": 9.94672747751162e-05,
+ "loss": 0.5191,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "eval_loss": 0.5160176157951355,
+ "eval_runtime": 3272.5369,
+ "eval_samples_per_second": 947.62,
+ "eval_steps_per_second": 1.851,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10344596302774209,
+ "grad_norm": 19089.77734375,
+ "learning_rate": 9.94619432692652e-05,
+ "loss": 0.5254,
+ "step": 20050
+ },
+ {
+ "epoch": 0.10370393301035491,
+ "grad_norm": 19005.53125,
+ "learning_rate": 9.945658536158667e-05,
+ "loss": 0.525,
+ "step": 20100
+ },
+ {
+ "epoch": 0.10396190299296774,
+ "grad_norm": 20896.8125,
+ "learning_rate": 9.945120105494054e-05,
+ "loss": 0.5173,
+ "step": 20150
+ },
+ {
+ "epoch": 0.10421987297558057,
+ "grad_norm": 19254.22265625,
+ "learning_rate": 9.944579035220085e-05,
+ "loss": 0.5195,
+ "step": 20200
+ },
+ {
+ "epoch": 0.10447784295819339,
+ "grad_norm": 19317.572265625,
+ "learning_rate": 9.944035325625573e-05,
+ "loss": 0.5239,
+ "step": 20250
+ },
+ {
+ "epoch": 0.10473581294080621,
+ "grad_norm": 18661.330078125,
+ "learning_rate": 9.94348897700074e-05,
+ "loss": 0.5243,
+ "step": 20300
+ },
+ {
+ "epoch": 0.10499378292341903,
+ "grad_norm": 18914.298828125,
+ "learning_rate": 9.942939989637216e-05,
+ "loss": 0.5247,
+ "step": 20350
+ },
+ {
+ "epoch": 0.10525175290603185,
+ "grad_norm": 17788.77734375,
+ "learning_rate": 9.942388363828041e-05,
+ "loss": 0.5205,
+ "step": 20400
+ },
+ {
+ "epoch": 0.10550972288864467,
+ "grad_norm": 17314.578125,
+ "learning_rate": 9.941834099867659e-05,
+ "loss": 0.5182,
+ "step": 20450
+ },
+ {
+ "epoch": 0.10576769287125751,
+ "grad_norm": 18627.068359375,
+ "learning_rate": 9.941277198051931e-05,
+ "loss": 0.5208,
+ "step": 20500
+ },
+ {
+ "epoch": 0.10602566285387033,
+ "grad_norm": 18274.4609375,
+ "learning_rate": 9.940717658678113e-05,
+ "loss": 0.5244,
+ "step": 20550
+ },
+ {
+ "epoch": 0.10628363283648315,
+ "grad_norm": 18668.767578125,
+ "learning_rate": 9.940155482044884e-05,
+ "loss": 0.5237,
+ "step": 20600
+ },
+ {
+ "epoch": 0.10654160281909597,
+ "grad_norm": 17703.703125,
+ "learning_rate": 9.939590668452316e-05,
+ "loss": 0.5148,
+ "step": 20650
+ },
+ {
+ "epoch": 0.10679957280170879,
+ "grad_norm": 18372.7578125,
+ "learning_rate": 9.939023218201901e-05,
+ "loss": 0.522,
+ "step": 20700
+ },
+ {
+ "epoch": 0.10705754278432161,
+ "grad_norm": 18439.521484375,
+ "learning_rate": 9.93845313159653e-05,
+ "loss": 0.5177,
+ "step": 20750
+ },
+ {
+ "epoch": 0.10731551276693443,
+ "grad_norm": 18812.10546875,
+ "learning_rate": 9.937880408940504e-05,
+ "loss": 0.5161,
+ "step": 20800
+ },
+ {
+ "epoch": 0.10757348274954727,
+ "grad_norm": 19163.4296875,
+ "learning_rate": 9.937305050539534e-05,
+ "loss": 0.5175,
+ "step": 20850
+ },
+ {
+ "epoch": 0.10783145273216009,
+ "grad_norm": 19459.3984375,
+ "learning_rate": 9.936727056700732e-05,
+ "loss": 0.5257,
+ "step": 20900
+ },
+ {
+ "epoch": 0.10808942271477291,
+ "grad_norm": 20272.22265625,
+ "learning_rate": 9.93614642773262e-05,
+ "loss": 0.5244,
+ "step": 20950
+ },
+ {
+ "epoch": 0.10834739269738573,
+ "grad_norm": 19995.736328125,
+ "learning_rate": 9.93556316394513e-05,
+ "loss": 0.5179,
+ "step": 21000
+ },
+ {
+ "epoch": 0.10860536267999855,
+ "grad_norm": 20567.369140625,
+ "learning_rate": 9.934977265649594e-05,
+ "loss": 0.528,
+ "step": 21050
+ },
+ {
+ "epoch": 0.10886333266261138,
+ "grad_norm": 19328.57421875,
+ "learning_rate": 9.934388733158753e-05,
+ "loss": 0.5249,
+ "step": 21100
+ },
+ {
+ "epoch": 0.1091213026452242,
+ "grad_norm": 17305.19921875,
+ "learning_rate": 9.933797566786757e-05,
+ "loss": 0.5163,
+ "step": 21150
+ },
+ {
+ "epoch": 0.10937927262783702,
+ "grad_norm": 19983.99609375,
+ "learning_rate": 9.933203766849155e-05,
+ "loss": 0.5227,
+ "step": 21200
+ },
+ {
+ "epoch": 0.10963724261044985,
+ "grad_norm": 18918.16015625,
+ "learning_rate": 9.93260733366291e-05,
+ "loss": 0.521,
+ "step": 21250
+ },
+ {
+ "epoch": 0.10989521259306267,
+ "grad_norm": 19260.40625,
+ "learning_rate": 9.932008267546384e-05,
+ "loss": 0.5195,
+ "step": 21300
+ },
+ {
+ "epoch": 0.1101531825756755,
+ "grad_norm": 16713.015625,
+ "learning_rate": 9.931406568819348e-05,
+ "loss": 0.5187,
+ "step": 21350
+ },
+ {
+ "epoch": 0.11041115255828832,
+ "grad_norm": 19787.67578125,
+ "learning_rate": 9.930802237802976e-05,
+ "loss": 0.5152,
+ "step": 21400
+ },
+ {
+ "epoch": 0.11066912254090114,
+ "grad_norm": 20632.775390625,
+ "learning_rate": 9.93019527481985e-05,
+ "loss": 0.5158,
+ "step": 21450
+ },
+ {
+ "epoch": 0.11092709252351396,
+ "grad_norm": 18545.748046875,
+ "learning_rate": 9.929585680193951e-05,
+ "loss": 0.5161,
+ "step": 21500
+ },
+ {
+ "epoch": 0.11118506250612678,
+ "grad_norm": 18961.138671875,
+ "learning_rate": 9.928973454250674e-05,
+ "loss": 0.5192,
+ "step": 21550
+ },
+ {
+ "epoch": 0.11144303248873962,
+ "grad_norm": 18970.013671875,
+ "learning_rate": 9.928358597316812e-05,
+ "loss": 0.5211,
+ "step": 21600
+ },
+ {
+ "epoch": 0.11170100247135244,
+ "grad_norm": 20800.046875,
+ "learning_rate": 9.927741109720561e-05,
+ "loss": 0.5143,
+ "step": 21650
+ },
+ {
+ "epoch": 0.11195897245396526,
+ "grad_norm": 18738.564453125,
+ "learning_rate": 9.927120991791528e-05,
+ "loss": 0.5232,
+ "step": 21700
+ },
+ {
+ "epoch": 0.11221694243657808,
+ "grad_norm": 18495.798828125,
+ "learning_rate": 9.926498243860715e-05,
+ "loss": 0.5176,
+ "step": 21750
+ },
+ {
+ "epoch": 0.1124749124191909,
+ "grad_norm": 18129.375,
+ "learning_rate": 9.925872866260537e-05,
+ "loss": 0.5132,
+ "step": 21800
+ },
+ {
+ "epoch": 0.11273288240180372,
+ "grad_norm": 19332.751953125,
+ "learning_rate": 9.925244859324807e-05,
+ "loss": 0.5135,
+ "step": 21850
+ },
+ {
+ "epoch": 0.11299085238441654,
+ "grad_norm": 19395.544921875,
+ "learning_rate": 9.924614223388742e-05,
+ "loss": 0.5191,
+ "step": 21900
+ },
+ {
+ "epoch": 0.11324882236702938,
+ "grad_norm": 20292.890625,
+ "learning_rate": 9.923980958788964e-05,
+ "loss": 0.5212,
+ "step": 21950
+ },
+ {
+ "epoch": 0.1135067923496422,
+ "grad_norm": 20309.033203125,
+ "learning_rate": 9.923345065863498e-05,
+ "loss": 0.5134,
+ "step": 22000
+ },
+ {
+ "epoch": 0.11376476233225502,
+ "grad_norm": 17513.578125,
+ "learning_rate": 9.922706544951772e-05,
+ "loss": 0.5216,
+ "step": 22050
+ },
+ {
+ "epoch": 0.11402273231486784,
+ "grad_norm": 18886.10546875,
+ "learning_rate": 9.922065396394614e-05,
+ "loss": 0.5219,
+ "step": 22100
+ },
+ {
+ "epoch": 0.11428070229748066,
+ "grad_norm": 19656.1484375,
+ "learning_rate": 9.921421620534257e-05,
+ "loss": 0.5163,
+ "step": 22150
+ },
+ {
+ "epoch": 0.11453867228009348,
+ "grad_norm": 18463.068359375,
+ "learning_rate": 9.920775217714338e-05,
+ "loss": 0.5198,
+ "step": 22200
+ },
+ {
+ "epoch": 0.1147966422627063,
+ "grad_norm": 20666.400390625,
+ "learning_rate": 9.920126188279892e-05,
+ "loss": 0.5164,
+ "step": 22250
+ },
+ {
+ "epoch": 0.11505461224531914,
+ "grad_norm": 20401.681640625,
+ "learning_rate": 9.919474532577359e-05,
+ "loss": 0.5163,
+ "step": 22300
+ },
+ {
+ "epoch": 0.11531258222793196,
+ "grad_norm": 21289.541015625,
+ "learning_rate": 9.918820250954581e-05,
+ "loss": 0.5114,
+ "step": 22350
+ },
+ {
+ "epoch": 0.11557055221054478,
+ "grad_norm": 17559.50390625,
+ "learning_rate": 9.918163343760801e-05,
+ "loss": 0.5156,
+ "step": 22400
+ },
+ {
+ "epoch": 0.1158285221931576,
+ "grad_norm": 17041.087890625,
+ "learning_rate": 9.917503811346662e-05,
+ "loss": 0.5146,
+ "step": 22450
+ },
+ {
+ "epoch": 0.11608649217577043,
+ "grad_norm": 20508.087890625,
+ "learning_rate": 9.916841654064212e-05,
+ "loss": 0.5202,
+ "step": 22500
+ },
+ {
+ "epoch": 0.11634446215838325,
+ "grad_norm": 21307.646484375,
+ "learning_rate": 9.916176872266894e-05,
+ "loss": 0.5108,
+ "step": 22550
+ },
+ {
+ "epoch": 0.11660243214099607,
+ "grad_norm": 21765.580078125,
+ "learning_rate": 9.91550946630956e-05,
+ "loss": 0.5158,
+ "step": 22600
+ },
+ {
+ "epoch": 0.1168604021236089,
+ "grad_norm": 18173.646484375,
+ "learning_rate": 9.914839436548454e-05,
+ "loss": 0.5081,
+ "step": 22650
+ },
+ {
+ "epoch": 0.11711837210622172,
+ "grad_norm": 19044.880859375,
+ "learning_rate": 9.914166783341227e-05,
+ "loss": 0.5144,
+ "step": 22700
+ },
+ {
+ "epoch": 0.11737634208883455,
+ "grad_norm": 19291.37109375,
+ "learning_rate": 9.91349150704693e-05,
+ "loss": 0.5147,
+ "step": 22750
+ },
+ {
+ "epoch": 0.11763431207144737,
+ "grad_norm": 16757.376953125,
+ "learning_rate": 9.91281360802601e-05,
+ "loss": 0.5163,
+ "step": 22800
+ },
+ {
+ "epoch": 0.11789228205406019,
+ "grad_norm": 18870.287109375,
+ "learning_rate": 9.912133086640318e-05,
+ "loss": 0.512,
+ "step": 22850
+ },
+ {
+ "epoch": 0.11815025203667301,
+ "grad_norm": 20520.115234375,
+ "learning_rate": 9.911449943253102e-05,
+ "loss": 0.5175,
+ "step": 22900
+ },
+ {
+ "epoch": 0.11840822201928583,
+ "grad_norm": 20585.21484375,
+ "learning_rate": 9.910764178229011e-05,
+ "loss": 0.5114,
+ "step": 22950
+ },
+ {
+ "epoch": 0.11866619200189867,
+ "grad_norm": 18660.384765625,
+ "learning_rate": 9.910075791934092e-05,
+ "loss": 0.5115,
+ "step": 23000
+ },
+ {
+ "epoch": 0.11892416198451149,
+ "grad_norm": 19391.318359375,
+ "learning_rate": 9.909384784735794e-05,
+ "loss": 0.5198,
+ "step": 23050
+ },
+ {
+ "epoch": 0.11918213196712431,
+ "grad_norm": 18007.306640625,
+ "learning_rate": 9.908691157002962e-05,
+ "loss": 0.5125,
+ "step": 23100
+ },
+ {
+ "epoch": 0.11944010194973713,
+ "grad_norm": 20804.501953125,
+ "learning_rate": 9.907994909105842e-05,
+ "loss": 0.516,
+ "step": 23150
+ },
+ {
+ "epoch": 0.11969807193234995,
+ "grad_norm": 18307.63671875,
+ "learning_rate": 9.907296041416076e-05,
+ "loss": 0.5108,
+ "step": 23200
+ },
+ {
+ "epoch": 0.11995604191496277,
+ "grad_norm": 19694.552734375,
+ "learning_rate": 9.906594554306709e-05,
+ "loss": 0.5092,
+ "step": 23250
+ },
+ {
+ "epoch": 0.12021401189757559,
+ "grad_norm": 20234.0703125,
+ "learning_rate": 9.90589044815218e-05,
+ "loss": 0.515,
+ "step": 23300
+ },
+ {
+ "epoch": 0.12047198188018843,
+ "grad_norm": 18483.4296875,
+ "learning_rate": 9.905183723328327e-05,
+ "loss": 0.5127,
+ "step": 23350
+ },
+ {
+ "epoch": 0.12072995186280125,
+ "grad_norm": 17447.51953125,
+ "learning_rate": 9.904474380212384e-05,
+ "loss": 0.5107,
+ "step": 23400
+ },
+ {
+ "epoch": 0.12098792184541407,
+ "grad_norm": 18881.7109375,
+ "learning_rate": 9.903762419182986e-05,
+ "loss": 0.5177,
+ "step": 23450
+ },
+ {
+ "epoch": 0.12124589182802689,
+ "grad_norm": 17861.990234375,
+ "learning_rate": 9.903047840620168e-05,
+ "loss": 0.5128,
+ "step": 23500
+ },
+ {
+ "epoch": 0.12150386181063971,
+ "grad_norm": 19111.53515625,
+ "learning_rate": 9.902330644905351e-05,
+ "loss": 0.5134,
+ "step": 23550
+ },
+ {
+ "epoch": 0.12176183179325253,
+ "grad_norm": 18461.107421875,
+ "learning_rate": 9.901610832421366e-05,
+ "loss": 0.51,
+ "step": 23600
+ },
+ {
+ "epoch": 0.12201980177586536,
+ "grad_norm": 18103.701171875,
+ "learning_rate": 9.900888403552431e-05,
+ "loss": 0.5131,
+ "step": 23650
+ },
+ {
+ "epoch": 0.12227777175847819,
+ "grad_norm": 18334.755859375,
+ "learning_rate": 9.900163358684168e-05,
+ "loss": 0.511,
+ "step": 23700
+ },
+ {
+ "epoch": 0.12253574174109101,
+ "grad_norm": 17476.322265625,
+ "learning_rate": 9.89943569820359e-05,
+ "loss": 0.5151,
+ "step": 23750
+ },
+ {
+ "epoch": 0.12279371172370383,
+ "grad_norm": 18698.09765625,
+ "learning_rate": 9.898705422499107e-05,
+ "loss": 0.5146,
+ "step": 23800
+ },
+ {
+ "epoch": 0.12305168170631665,
+ "grad_norm": 18321.80859375,
+ "learning_rate": 9.897972531960528e-05,
+ "loss": 0.5109,
+ "step": 23850
+ },
+ {
+ "epoch": 0.12330965168892948,
+ "grad_norm": 18234.361328125,
+ "learning_rate": 9.897237026979056e-05,
+ "loss": 0.5115,
+ "step": 23900
+ },
+ {
+ "epoch": 0.1235676216715423,
+ "grad_norm": 19737.849609375,
+ "learning_rate": 9.896498907947287e-05,
+ "loss": 0.5155,
+ "step": 23950
+ },
+ {
+ "epoch": 0.12382559165415512,
+ "grad_norm": 19136.279296875,
+ "learning_rate": 9.895758175259218e-05,
+ "loss": 0.5162,
+ "step": 24000
+ },
+ {
+ "epoch": 0.12408356163676794,
+ "grad_norm": 18575.431640625,
+ "learning_rate": 9.895014829310235e-05,
+ "loss": 0.5141,
+ "step": 24050
+ },
+ {
+ "epoch": 0.12434153161938077,
+ "grad_norm": 17589.353515625,
+ "learning_rate": 9.894268870497121e-05,
+ "loss": 0.501,
+ "step": 24100
+ },
+ {
+ "epoch": 0.1245995016019936,
+ "grad_norm": 19781.830078125,
+ "learning_rate": 9.893520299218057e-05,
+ "loss": 0.5128,
+ "step": 24150
+ },
+ {
+ "epoch": 0.12485747158460642,
+ "grad_norm": 17501.150390625,
+ "learning_rate": 9.892769115872617e-05,
+ "loss": 0.5113,
+ "step": 24200
+ },
+ {
+ "epoch": 0.12511544156721924,
+ "grad_norm": 21107.34375,
+ "learning_rate": 9.892015320861762e-05,
+ "loss": 0.5041,
+ "step": 24250
+ },
+ {
+ "epoch": 0.12537341154983206,
+ "grad_norm": 17529.345703125,
+ "learning_rate": 9.89125891458786e-05,
+ "loss": 0.5093,
+ "step": 24300
+ },
+ {
+ "epoch": 0.12563138153244488,
+ "grad_norm": 18061.890625,
+ "learning_rate": 9.890499897454663e-05,
+ "loss": 0.5111,
+ "step": 24350
+ },
+ {
+ "epoch": 0.1258893515150577,
+ "grad_norm": 21213.177734375,
+ "learning_rate": 9.889738269867318e-05,
+ "loss": 0.5106,
+ "step": 24400
+ },
+ {
+ "epoch": 0.12614732149767052,
+ "grad_norm": 17838.625,
+ "learning_rate": 9.88897403223237e-05,
+ "loss": 0.5144,
+ "step": 24450
+ },
+ {
+ "epoch": 0.12640529148028334,
+ "grad_norm": 19047.787109375,
+ "learning_rate": 9.888207184957752e-05,
+ "loss": 0.5133,
+ "step": 24500
+ },
+ {
+ "epoch": 0.12666326146289617,
+ "grad_norm": 17355.26171875,
+ "learning_rate": 9.887437728452794e-05,
+ "loss": 0.5054,
+ "step": 24550
+ },
+ {
+ "epoch": 0.12692123144550901,
+ "grad_norm": 20496.369140625,
+ "learning_rate": 9.886665663128216e-05,
+ "loss": 0.51,
+ "step": 24600
+ },
+ {
+ "epoch": 0.12717920142812184,
+ "grad_norm": 19887.734375,
+ "learning_rate": 9.885890989396133e-05,
+ "loss": 0.5049,
+ "step": 24650
+ },
+ {
+ "epoch": 0.12743717141073466,
+ "grad_norm": 20027.69140625,
+ "learning_rate": 9.885113707670049e-05,
+ "loss": 0.5118,
+ "step": 24700
+ },
+ {
+ "epoch": 0.12769514139334748,
+ "grad_norm": 18888.92578125,
+ "learning_rate": 9.884333818364861e-05,
+ "loss": 0.5168,
+ "step": 24750
+ },
+ {
+ "epoch": 0.1279531113759603,
+ "grad_norm": 20906.673828125,
+ "learning_rate": 9.883551321896862e-05,
+ "loss": 0.5109,
+ "step": 24800
+ },
+ {
+ "epoch": 0.12821108135857312,
+ "grad_norm": 20228.833984375,
+ "learning_rate": 9.882766218683731e-05,
+ "loss": 0.5167,
+ "step": 24850
+ },
+ {
+ "epoch": 0.12846905134118594,
+ "grad_norm": 19832.4609375,
+ "learning_rate": 9.881978509144543e-05,
+ "loss": 0.5113,
+ "step": 24900
+ },
+ {
+ "epoch": 0.12872702132379876,
+ "grad_norm": 18049.193359375,
+ "learning_rate": 9.881188193699758e-05,
+ "loss": 0.5121,
+ "step": 24950
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "grad_norm": 18765.033203125,
+ "learning_rate": 9.880395272771236e-05,
+ "loss": 0.5123,
+ "step": 25000
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "eval_loss": 0.5013377666473389,
+ "eval_runtime": 3332.4061,
+ "eval_samples_per_second": 930.595,
+ "eval_steps_per_second": 1.818,
+ "step": 25000
+ },
+ {
+ "epoch": 0.1292429612890244,
+ "grad_norm": 18435.787109375,
+ "learning_rate": 9.879599746782221e-05,
+ "loss": 0.5096,
+ "step": 25050
+ },
+ {
+ "epoch": 0.12950093127163723,
+ "grad_norm": 18993.890625,
+ "learning_rate": 9.878801616157348e-05,
+ "loss": 0.5091,
+ "step": 25100
+ },
+ {
+ "epoch": 0.12975890125425005,
+ "grad_norm": 19766.783203125,
+ "learning_rate": 9.878000881322646e-05,
+ "loss": 0.5059,
+ "step": 25150
+ },
+ {
+ "epoch": 0.13001687123686287,
+ "grad_norm": 19316.537109375,
+ "learning_rate": 9.87719754270553e-05,
+ "loss": 0.5112,
+ "step": 25200
+ },
+ {
+ "epoch": 0.1302748412194757,
+ "grad_norm": 19288.64453125,
+ "learning_rate": 9.876391600734807e-05,
+ "loss": 0.5031,
+ "step": 25250
+ },
+ {
+ "epoch": 0.1305328112020885,
+ "grad_norm": 18962.7734375,
+ "learning_rate": 9.875583055840673e-05,
+ "loss": 0.5113,
+ "step": 25300
+ },
+ {
+ "epoch": 0.13079078118470136,
+ "grad_norm": 19399.21875,
+ "learning_rate": 9.874771908454714e-05,
+ "loss": 0.5177,
+ "step": 25350
+ },
+ {
+ "epoch": 0.13104875116731418,
+ "grad_norm": 20511.134765625,
+ "learning_rate": 9.873958159009904e-05,
+ "loss": 0.5049,
+ "step": 25400
+ },
+ {
+ "epoch": 0.131306721149927,
+ "grad_norm": 17669.00390625,
+ "learning_rate": 9.87314180794061e-05,
+ "loss": 0.5076,
+ "step": 25450
+ },
+ {
+ "epoch": 0.13156469113253982,
+ "grad_norm": 20254.75390625,
+ "learning_rate": 9.872322855682579e-05,
+ "loss": 0.5102,
+ "step": 25500
+ },
+ {
+ "epoch": 0.13182266111515265,
+ "grad_norm": 21859.880859375,
+ "learning_rate": 9.871501302672956e-05,
+ "loss": 0.5098,
+ "step": 25550
+ },
+ {
+ "epoch": 0.13208063109776547,
+ "grad_norm": 18794.90625,
+ "learning_rate": 9.870677149350268e-05,
+ "loss": 0.5078,
+ "step": 25600
+ },
+ {
+ "epoch": 0.1323386010803783,
+ "grad_norm": 19909.65625,
+ "learning_rate": 9.869850396154434e-05,
+ "loss": 0.5129,
+ "step": 25650
+ },
+ {
+ "epoch": 0.1325965710629911,
+ "grad_norm": 17887.99609375,
+ "learning_rate": 9.869021043526756e-05,
+ "loss": 0.508,
+ "step": 25700
+ },
+ {
+ "epoch": 0.13285454104560393,
+ "grad_norm": 17189.033203125,
+ "learning_rate": 9.868189091909929e-05,
+ "loss": 0.5114,
+ "step": 25750
+ },
+ {
+ "epoch": 0.13311251102821675,
+ "grad_norm": 21320.78125,
+ "learning_rate": 9.867354541748033e-05,
+ "loss": 0.5081,
+ "step": 25800
+ },
+ {
+ "epoch": 0.13337048101082957,
+ "grad_norm": 19035.33984375,
+ "learning_rate": 9.866517393486532e-05,
+ "loss": 0.5065,
+ "step": 25850
+ },
+ {
+ "epoch": 0.1336284509934424,
+ "grad_norm": 19038.876953125,
+ "learning_rate": 9.86567764757228e-05,
+ "loss": 0.5055,
+ "step": 25900
+ },
+ {
+ "epoch": 0.13388642097605521,
+ "grad_norm": 20425.6875,
+ "learning_rate": 9.86483530445352e-05,
+ "loss": 0.5091,
+ "step": 25950
+ },
+ {
+ "epoch": 0.13414439095866804,
+ "grad_norm": 19947.34765625,
+ "learning_rate": 9.863990364579876e-05,
+ "loss": 0.5062,
+ "step": 26000
+ },
+ {
+ "epoch": 0.13440236094128089,
+ "grad_norm": 18758.7890625,
+ "learning_rate": 9.863142828402361e-05,
+ "loss": 0.5099,
+ "step": 26050
+ },
+ {
+ "epoch": 0.1346603309238937,
+ "grad_norm": 18494.076171875,
+ "learning_rate": 9.862292696373372e-05,
+ "loss": 0.5043,
+ "step": 26100
+ },
+ {
+ "epoch": 0.13491830090650653,
+ "grad_norm": 19646.841796875,
+ "learning_rate": 9.861439968946696e-05,
+ "loss": 0.508,
+ "step": 26150
+ },
+ {
+ "epoch": 0.13517627088911935,
+ "grad_norm": 19356.009765625,
+ "learning_rate": 9.8605846465775e-05,
+ "loss": 0.5015,
+ "step": 26200
+ },
+ {
+ "epoch": 0.13543424087173217,
+ "grad_norm": 19243.1875,
+ "learning_rate": 9.859726729722341e-05,
+ "loss": 0.5086,
+ "step": 26250
+ },
+ {
+ "epoch": 0.135692210854345,
+ "grad_norm": 20116.43359375,
+ "learning_rate": 9.858866218839156e-05,
+ "loss": 0.5074,
+ "step": 26300
+ },
+ {
+ "epoch": 0.1359501808369578,
+ "grad_norm": 18592.1015625,
+ "learning_rate": 9.858003114387269e-05,
+ "loss": 0.5054,
+ "step": 26350
+ },
+ {
+ "epoch": 0.13620815081957063,
+ "grad_norm": 19552.505859375,
+ "learning_rate": 9.85713741682739e-05,
+ "loss": 0.5042,
+ "step": 26400
+ },
+ {
+ "epoch": 0.13646612080218345,
+ "grad_norm": 18818.142578125,
+ "learning_rate": 9.856269126621611e-05,
+ "loss": 0.5106,
+ "step": 26450
+ },
+ {
+ "epoch": 0.13672409078479628,
+ "grad_norm": 21973.685546875,
+ "learning_rate": 9.855398244233407e-05,
+ "loss": 0.5116,
+ "step": 26500
+ },
+ {
+ "epoch": 0.1369820607674091,
+ "grad_norm": 19296.7890625,
+ "learning_rate": 9.854524770127641e-05,
+ "loss": 0.5103,
+ "step": 26550
+ },
+ {
+ "epoch": 0.13724003075002192,
+ "grad_norm": 18975.22265625,
+ "learning_rate": 9.853648704770554e-05,
+ "loss": 0.5093,
+ "step": 26600
+ },
+ {
+ "epoch": 0.13749800073263474,
+ "grad_norm": 20003.19140625,
+ "learning_rate": 9.852770048629776e-05,
+ "loss": 0.5094,
+ "step": 26650
+ },
+ {
+ "epoch": 0.13775597071524756,
+ "grad_norm": 19885.341796875,
+ "learning_rate": 9.851888802174312e-05,
+ "loss": 0.502,
+ "step": 26700
+ },
+ {
+ "epoch": 0.1380139406978604,
+ "grad_norm": 18030.115234375,
+ "learning_rate": 9.851004965874557e-05,
+ "loss": 0.5045,
+ "step": 26750
+ },
+ {
+ "epoch": 0.13827191068047323,
+ "grad_norm": 19143.369140625,
+ "learning_rate": 9.850118540202286e-05,
+ "loss": 0.5068,
+ "step": 26800
+ },
+ {
+ "epoch": 0.13852988066308605,
+ "grad_norm": 18902.5390625,
+ "learning_rate": 9.849229525630656e-05,
+ "loss": 0.4984,
+ "step": 26850
+ },
+ {
+ "epoch": 0.13878785064569887,
+ "grad_norm": 18523.115234375,
+ "learning_rate": 9.848337922634206e-05,
+ "loss": 0.5099,
+ "step": 26900
+ },
+ {
+ "epoch": 0.1390458206283117,
+ "grad_norm": 19873.283203125,
+ "learning_rate": 9.847443731688852e-05,
+ "loss": 0.5039,
+ "step": 26950
+ },
+ {
+ "epoch": 0.13930379061092452,
+ "grad_norm": 20202.23046875,
+ "learning_rate": 9.846546953271902e-05,
+ "loss": 0.507,
+ "step": 27000
+ },
+ {
+ "epoch": 0.13956176059353734,
+ "grad_norm": 17484.572265625,
+ "learning_rate": 9.845647587862034e-05,
+ "loss": 0.5113,
+ "step": 27050
+ },
+ {
+ "epoch": 0.13981973057615016,
+ "grad_norm": 17931.634765625,
+ "learning_rate": 9.844745635939316e-05,
+ "loss": 0.5051,
+ "step": 27100
+ },
+ {
+ "epoch": 0.14007770055876298,
+ "grad_norm": 20536.693359375,
+ "learning_rate": 9.843841097985191e-05,
+ "loss": 0.5044,
+ "step": 27150
+ },
+ {
+ "epoch": 0.1403356705413758,
+ "grad_norm": 18379.619140625,
+ "learning_rate": 9.842933974482482e-05,
+ "loss": 0.5071,
+ "step": 27200
+ },
+ {
+ "epoch": 0.14059364052398862,
+ "grad_norm": 19097.240234375,
+ "learning_rate": 9.842024265915397e-05,
+ "loss": 0.5046,
+ "step": 27250
+ },
+ {
+ "epoch": 0.14085161050660144,
+ "grad_norm": 22569.80859375,
+ "learning_rate": 9.841111972769517e-05,
+ "loss": 0.5022,
+ "step": 27300
+ },
+ {
+ "epoch": 0.14110958048921426,
+ "grad_norm": 17499.166015625,
+ "learning_rate": 9.84019709553181e-05,
+ "loss": 0.5014,
+ "step": 27350
+ },
+ {
+ "epoch": 0.14136755047182709,
+ "grad_norm": 20447.25,
+ "learning_rate": 9.839279634690619e-05,
+ "loss": 0.5065,
+ "step": 27400
+ },
+ {
+ "epoch": 0.14162552045443993,
+ "grad_norm": 20977.70703125,
+ "learning_rate": 9.838359590735665e-05,
+ "loss": 0.5042,
+ "step": 27450
+ },
+ {
+ "epoch": 0.14188349043705276,
+ "grad_norm": 18168.962890625,
+ "learning_rate": 9.83743696415805e-05,
+ "loss": 0.5043,
+ "step": 27500
+ },
+ {
+ "epoch": 0.14214146041966558,
+ "grad_norm": 18671.841796875,
+ "learning_rate": 9.836511755450256e-05,
+ "loss": 0.5054,
+ "step": 27550
+ },
+ {
+ "epoch": 0.1423994304022784,
+ "grad_norm": 17737.90625,
+ "learning_rate": 9.835583965106141e-05,
+ "loss": 0.507,
+ "step": 27600
+ },
+ {
+ "epoch": 0.14265740038489122,
+ "grad_norm": 23218.873046875,
+ "learning_rate": 9.834653593620939e-05,
+ "loss": 0.5055,
+ "step": 27650
+ },
+ {
+ "epoch": 0.14291537036750404,
+ "grad_norm": 20013.341796875,
+ "learning_rate": 9.833720641491269e-05,
+ "loss": 0.5008,
+ "step": 27700
+ },
+ {
+ "epoch": 0.14317334035011686,
+ "grad_norm": 21755.08203125,
+ "learning_rate": 9.832785109215119e-05,
+ "loss": 0.5029,
+ "step": 27750
+ },
+ {
+ "epoch": 0.14343131033272968,
+ "grad_norm": 18450.541015625,
+ "learning_rate": 9.831846997291859e-05,
+ "loss": 0.5086,
+ "step": 27800
+ },
+ {
+ "epoch": 0.1436892803153425,
+ "grad_norm": 17578.990234375,
+ "learning_rate": 9.830906306222235e-05,
+ "loss": 0.498,
+ "step": 27850
+ },
+ {
+ "epoch": 0.14394725029795533,
+ "grad_norm": 18771.2578125,
+ "learning_rate": 9.82996303650837e-05,
+ "loss": 0.5006,
+ "step": 27900
+ },
+ {
+ "epoch": 0.14420522028056815,
+ "grad_norm": 19841.912109375,
+ "learning_rate": 9.829017188653763e-05,
+ "loss": 0.5003,
+ "step": 27950
+ },
+ {
+ "epoch": 0.14446319026318097,
+ "grad_norm": 19089.384765625,
+ "learning_rate": 9.82806876316329e-05,
+ "loss": 0.5028,
+ "step": 28000
+ },
+ {
+ "epoch": 0.1447211602457938,
+ "grad_norm": 17971.998046875,
+ "learning_rate": 9.827117760543198e-05,
+ "loss": 0.5103,
+ "step": 28050
+ },
+ {
+ "epoch": 0.1449791302284066,
+ "grad_norm": 19590.46875,
+ "learning_rate": 9.826164181301121e-05,
+ "loss": 0.5075,
+ "step": 28100
+ },
+ {
+ "epoch": 0.14523710021101943,
+ "grad_norm": 19316.150390625,
+ "learning_rate": 9.825208025946056e-05,
+ "loss": 0.4971,
+ "step": 28150
+ },
+ {
+ "epoch": 0.14549507019363228,
+ "grad_norm": 19814.3125,
+ "learning_rate": 9.82424929498838e-05,
+ "loss": 0.501,
+ "step": 28200
+ },
+ {
+ "epoch": 0.1457530401762451,
+ "grad_norm": 18669.203125,
+ "learning_rate": 9.823287988939847e-05,
+ "loss": 0.5027,
+ "step": 28250
+ },
+ {
+ "epoch": 0.14601101015885792,
+ "grad_norm": 20375.48828125,
+ "learning_rate": 9.822324108313585e-05,
+ "loss": 0.4966,
+ "step": 28300
+ },
+ {
+ "epoch": 0.14626898014147074,
+ "grad_norm": 19665.4296875,
+ "learning_rate": 9.82135765362409e-05,
+ "loss": 0.4966,
+ "step": 28350
+ },
+ {
+ "epoch": 0.14652695012408357,
+ "grad_norm": 19579.771484375,
+ "learning_rate": 9.820388625387242e-05,
+ "loss": 0.5028,
+ "step": 28400
+ },
+ {
+ "epoch": 0.1467849201066964,
+ "grad_norm": 20270.564453125,
+ "learning_rate": 9.819417024120285e-05,
+ "loss": 0.4972,
+ "step": 28450
+ },
+ {
+ "epoch": 0.1470428900893092,
+ "grad_norm": 20025.6328125,
+ "learning_rate": 9.818442850341845e-05,
+ "loss": 0.5082,
+ "step": 28500
+ },
+ {
+ "epoch": 0.14730086007192203,
+ "grad_norm": 19062.525390625,
+ "learning_rate": 9.817466104571915e-05,
+ "loss": 0.4983,
+ "step": 28550
+ },
+ {
+ "epoch": 0.14755883005453485,
+ "grad_norm": 18558.0390625,
+ "learning_rate": 9.816486787331862e-05,
+ "loss": 0.5004,
+ "step": 28600
+ },
+ {
+ "epoch": 0.14781680003714767,
+ "grad_norm": 20880.6875,
+ "learning_rate": 9.815504899144428e-05,
+ "loss": 0.5036,
+ "step": 28650
+ },
+ {
+ "epoch": 0.1480747700197605,
+ "grad_norm": 19120.3359375,
+ "learning_rate": 9.814520440533726e-05,
+ "loss": 0.5004,
+ "step": 28700
+ },
+ {
+ "epoch": 0.14833274000237331,
+ "grad_norm": 17185.451171875,
+ "learning_rate": 9.813533412025242e-05,
+ "loss": 0.5047,
+ "step": 28750
+ },
+ {
+ "epoch": 0.14859070998498614,
+ "grad_norm": 21795.697265625,
+ "learning_rate": 9.81254381414583e-05,
+ "loss": 0.5033,
+ "step": 28800
+ },
+ {
+ "epoch": 0.14884867996759896,
+ "grad_norm": 21923.44140625,
+ "learning_rate": 9.811551647423718e-05,
+ "loss": 0.4957,
+ "step": 28850
+ },
+ {
+ "epoch": 0.1491066499502118,
+ "grad_norm": 18988.30078125,
+ "learning_rate": 9.810556912388509e-05,
+ "loss": 0.4979,
+ "step": 28900
+ },
+ {
+ "epoch": 0.14936461993282463,
+ "grad_norm": 18479.74609375,
+ "learning_rate": 9.809559609571169e-05,
+ "loss": 0.5003,
+ "step": 28950
+ },
+ {
+ "epoch": 0.14962258991543745,
+ "grad_norm": 20426.57421875,
+ "learning_rate": 9.808559739504043e-05,
+ "loss": 0.5019,
+ "step": 29000
+ },
+ {
+ "epoch": 0.14988055989805027,
+ "grad_norm": 20044.365234375,
+ "learning_rate": 9.80755730272084e-05,
+ "loss": 0.5012,
+ "step": 29050
+ },
+ {
+ "epoch": 0.1501385298806631,
+ "grad_norm": 18321.439453125,
+ "learning_rate": 9.806552299756641e-05,
+ "loss": 0.4918,
+ "step": 29100
+ },
+ {
+ "epoch": 0.1503964998632759,
+ "grad_norm": 20315.681640625,
+ "learning_rate": 9.805544731147899e-05,
+ "loss": 0.5015,
+ "step": 29150
+ },
+ {
+ "epoch": 0.15065446984588873,
+ "grad_norm": 20399.990234375,
+ "learning_rate": 9.804534597432432e-05,
+ "loss": 0.4967,
+ "step": 29200
+ },
+ {
+ "epoch": 0.15091243982850155,
+ "grad_norm": 19539.91796875,
+ "learning_rate": 9.803521899149432e-05,
+ "loss": 0.5002,
+ "step": 29250
+ },
+ {
+ "epoch": 0.15117040981111438,
+ "grad_norm": 20317.970703125,
+ "learning_rate": 9.802506636839457e-05,
+ "loss": 0.4988,
+ "step": 29300
+ },
+ {
+ "epoch": 0.1514283797937272,
+ "grad_norm": 18728.32421875,
+ "learning_rate": 9.801488811044434e-05,
+ "loss": 0.5007,
+ "step": 29350
+ },
+ {
+ "epoch": 0.15168634977634002,
+ "grad_norm": 21256.51171875,
+ "learning_rate": 9.80046842230766e-05,
+ "loss": 0.5066,
+ "step": 29400
+ },
+ {
+ "epoch": 0.15194431975895284,
+ "grad_norm": 18871.8828125,
+ "learning_rate": 9.799445471173799e-05,
+ "loss": 0.502,
+ "step": 29450
+ },
+ {
+ "epoch": 0.15220228974156566,
+ "grad_norm": 18434.251953125,
+ "learning_rate": 9.798419958188878e-05,
+ "loss": 0.5018,
+ "step": 29500
+ },
+ {
+ "epoch": 0.15246025972417848,
+ "grad_norm": 18562.412109375,
+ "learning_rate": 9.7973918839003e-05,
+ "loss": 0.4978,
+ "step": 29550
+ },
+ {
+ "epoch": 0.15271822970679133,
+ "grad_norm": 20020.7890625,
+ "learning_rate": 9.796361248856832e-05,
+ "loss": 0.4989,
+ "step": 29600
+ },
+ {
+ "epoch": 0.15297619968940415,
+ "grad_norm": 20026.6015625,
+ "learning_rate": 9.795328053608606e-05,
+ "loss": 0.5002,
+ "step": 29650
+ },
+ {
+ "epoch": 0.15323416967201697,
+ "grad_norm": 20098.703125,
+ "learning_rate": 9.794292298707119e-05,
+ "loss": 0.4938,
+ "step": 29700
+ },
+ {
+ "epoch": 0.1534921396546298,
+ "grad_norm": 18960.154296875,
+ "learning_rate": 9.793253984705239e-05,
+ "loss": 0.4956,
+ "step": 29750
+ },
+ {
+ "epoch": 0.15375010963724262,
+ "grad_norm": 20478.669921875,
+ "learning_rate": 9.7922131121572e-05,
+ "loss": 0.4998,
+ "step": 29800
+ },
+ {
+ "epoch": 0.15400807961985544,
+ "grad_norm": 20406.701171875,
+ "learning_rate": 9.791169681618596e-05,
+ "loss": 0.5083,
+ "step": 29850
+ },
+ {
+ "epoch": 0.15426604960246826,
+ "grad_norm": 17598.75390625,
+ "learning_rate": 9.790123693646391e-05,
+ "loss": 0.4968,
+ "step": 29900
+ },
+ {
+ "epoch": 0.15452401958508108,
+ "grad_norm": 19622.521484375,
+ "learning_rate": 9.789075148798915e-05,
+ "loss": 0.4881,
+ "step": 29950
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "grad_norm": 20092.87109375,
+ "learning_rate": 9.78802404763586e-05,
+ "loss": 0.4994,
+ "step": 30000
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "eval_loss": 0.4904574453830719,
+ "eval_runtime": 3267.2095,
+ "eval_samples_per_second": 949.165,
+ "eval_steps_per_second": 1.854,
+ "step": 30000
+ },
+ {
+ "epoch": 0.15503995955030672,
+ "grad_norm": 19136.84765625,
+ "learning_rate": 9.786970390718282e-05,
+ "loss": 0.4963,
+ "step": 30050
+ },
+ {
+ "epoch": 0.15529792953291954,
+ "grad_norm": 20464.998046875,
+ "learning_rate": 9.785914178608603e-05,
+ "loss": 0.4994,
+ "step": 30100
+ },
+ {
+ "epoch": 0.15555589951553236,
+ "grad_norm": 23388.55859375,
+ "learning_rate": 9.784855411870611e-05,
+ "loss": 0.5036,
+ "step": 30150
+ },
+ {
+ "epoch": 0.15581386949814519,
+ "grad_norm": 20002.095703125,
+ "learning_rate": 9.783794091069451e-05,
+ "loss": 0.5021,
+ "step": 30200
+ },
+ {
+ "epoch": 0.156071839480758,
+ "grad_norm": 19565.419921875,
+ "learning_rate": 9.782730216771641e-05,
+ "loss": 0.4929,
+ "step": 30250
+ },
+ {
+ "epoch": 0.15632980946337086,
+ "grad_norm": 20284.173828125,
+ "learning_rate": 9.781663789545052e-05,
+ "loss": 0.4889,
+ "step": 30300
+ },
+ {
+ "epoch": 0.15658777944598368,
+ "grad_norm": 18613.439453125,
+ "learning_rate": 9.780594809958922e-05,
+ "loss": 0.496,
+ "step": 30350
+ },
+ {
+ "epoch": 0.1568457494285965,
+ "grad_norm": 19419.1640625,
+ "learning_rate": 9.779523278583855e-05,
+ "loss": 0.4977,
+ "step": 30400
+ },
+ {
+ "epoch": 0.15710371941120932,
+ "grad_norm": 21695.361328125,
+ "learning_rate": 9.778449195991813e-05,
+ "loss": 0.4998,
+ "step": 30450
+ },
+ {
+ "epoch": 0.15736168939382214,
+ "grad_norm": 21914.3828125,
+ "learning_rate": 9.777372562756117e-05,
+ "loss": 0.4936,
+ "step": 30500
+ },
+ {
+ "epoch": 0.15761965937643496,
+ "grad_norm": 22384.525390625,
+ "learning_rate": 9.776293379451458e-05,
+ "loss": 0.5034,
+ "step": 30550
+ },
+ {
+ "epoch": 0.15787762935904778,
+ "grad_norm": 21174.220703125,
+ "learning_rate": 9.775211646653879e-05,
+ "loss": 0.4928,
+ "step": 30600
+ },
+ {
+ "epoch": 0.1581355993416606,
+ "grad_norm": 19809.953125,
+ "learning_rate": 9.77412736494079e-05,
+ "loss": 0.5014,
+ "step": 30650
+ },
+ {
+ "epoch": 0.15839356932427343,
+ "grad_norm": 19657.048828125,
+ "learning_rate": 9.773040534890958e-05,
+ "loss": 0.5022,
+ "step": 30700
+ },
+ {
+ "epoch": 0.15865153930688625,
+ "grad_norm": 20559.490234375,
+ "learning_rate": 9.771951157084514e-05,
+ "loss": 0.4923,
+ "step": 30750
+ },
+ {
+ "epoch": 0.15890950928949907,
+ "grad_norm": 19473.294921875,
+ "learning_rate": 9.770859232102946e-05,
+ "loss": 0.4991,
+ "step": 30800
+ },
+ {
+ "epoch": 0.1591674792721119,
+ "grad_norm": 19243.509765625,
+ "learning_rate": 9.769764760529102e-05,
+ "loss": 0.4934,
+ "step": 30850
+ },
+ {
+ "epoch": 0.1594254492547247,
+ "grad_norm": 20882.853515625,
+ "learning_rate": 9.768667742947189e-05,
+ "loss": 0.4989,
+ "step": 30900
+ },
+ {
+ "epoch": 0.15968341923733753,
+ "grad_norm": 19654.17578125,
+ "learning_rate": 9.767568179942776e-05,
+ "loss": 0.501,
+ "step": 30950
+ },
+ {
+ "epoch": 0.15994138921995035,
+ "grad_norm": 20069.412109375,
+ "learning_rate": 9.766466072102786e-05,
+ "loss": 0.5001,
+ "step": 31000
+ },
+ {
+ "epoch": 0.1601993592025632,
+ "grad_norm": 19730.416015625,
+ "learning_rate": 9.765361420015506e-05,
+ "loss": 0.4947,
+ "step": 31050
+ },
+ {
+ "epoch": 0.16045732918517602,
+ "grad_norm": 19825.43359375,
+ "learning_rate": 9.764254224270573e-05,
+ "loss": 0.5012,
+ "step": 31100
+ },
+ {
+ "epoch": 0.16071529916778884,
+ "grad_norm": 19111.859375,
+ "learning_rate": 9.763144485458992e-05,
+ "loss": 0.4946,
+ "step": 31150
+ },
+ {
+ "epoch": 0.16097326915040167,
+ "grad_norm": 20071.7578125,
+ "learning_rate": 9.762032204173116e-05,
+ "loss": 0.4961,
+ "step": 31200
+ },
+ {
+ "epoch": 0.1612312391330145,
+ "grad_norm": 18780.638671875,
+ "learning_rate": 9.76091738100666e-05,
+ "loss": 0.4952,
+ "step": 31250
+ },
+ {
+ "epoch": 0.1614892091156273,
+ "grad_norm": 20192.69140625,
+ "learning_rate": 9.759800016554699e-05,
+ "loss": 0.4919,
+ "step": 31300
+ },
+ {
+ "epoch": 0.16174717909824013,
+ "grad_norm": 18430.57421875,
+ "learning_rate": 9.758680111413653e-05,
+ "loss": 0.4953,
+ "step": 31350
+ },
+ {
+ "epoch": 0.16200514908085295,
+ "grad_norm": 18921.740234375,
+ "learning_rate": 9.757557666181314e-05,
+ "loss": 0.5013,
+ "step": 31400
+ },
+ {
+ "epoch": 0.16226311906346577,
+ "grad_norm": 18918.857421875,
+ "learning_rate": 9.756432681456815e-05,
+ "loss": 0.4976,
+ "step": 31450
+ },
+ {
+ "epoch": 0.1625210890460786,
+ "grad_norm": 21373.814453125,
+ "learning_rate": 9.755305157840655e-05,
+ "loss": 0.4975,
+ "step": 31500
+ },
+ {
+ "epoch": 0.16277905902869141,
+ "grad_norm": 19509.482421875,
+ "learning_rate": 9.754175095934684e-05,
+ "loss": 0.4966,
+ "step": 31550
+ },
+ {
+ "epoch": 0.16303702901130424,
+ "grad_norm": 18362.125,
+ "learning_rate": 9.753042496342103e-05,
+ "loss": 0.505,
+ "step": 31600
+ },
+ {
+ "epoch": 0.16329499899391706,
+ "grad_norm": 20344.11328125,
+ "learning_rate": 9.751907359667476e-05,
+ "loss": 0.4988,
+ "step": 31650
+ },
+ {
+ "epoch": 0.16355296897652988,
+ "grad_norm": 21398.97265625,
+ "learning_rate": 9.750769686516715e-05,
+ "loss": 0.493,
+ "step": 31700
+ },
+ {
+ "epoch": 0.16381093895914273,
+ "grad_norm": 21106.955078125,
+ "learning_rate": 9.74962947749709e-05,
+ "loss": 0.4999,
+ "step": 31750
+ },
+ {
+ "epoch": 0.16406890894175555,
+ "grad_norm": 19787.216796875,
+ "learning_rate": 9.74848673321722e-05,
+ "loss": 0.4932,
+ "step": 31800
+ },
+ {
+ "epoch": 0.16432687892436837,
+ "grad_norm": 19198.83984375,
+ "learning_rate": 9.747341454287082e-05,
+ "loss": 0.4919,
+ "step": 31850
+ },
+ {
+ "epoch": 0.1645848489069812,
+ "grad_norm": 18460.92578125,
+ "learning_rate": 9.746193641318002e-05,
+ "loss": 0.495,
+ "step": 31900
+ },
+ {
+ "epoch": 0.164842818889594,
+ "grad_norm": 18591.427734375,
+ "learning_rate": 9.74504329492266e-05,
+ "loss": 0.4888,
+ "step": 31950
+ },
+ {
+ "epoch": 0.16510078887220683,
+ "grad_norm": 21651.3515625,
+ "learning_rate": 9.743890415715091e-05,
+ "loss": 0.4909,
+ "step": 32000
+ },
+ {
+ "epoch": 0.16535875885481965,
+ "grad_norm": 18884.486328125,
+ "learning_rate": 9.742735004310677e-05,
+ "loss": 0.4981,
+ "step": 32050
+ },
+ {
+ "epoch": 0.16561672883743248,
+ "grad_norm": 19223.658203125,
+ "learning_rate": 9.741577061326157e-05,
+ "loss": 0.4961,
+ "step": 32100
+ },
+ {
+ "epoch": 0.1658746988200453,
+ "grad_norm": 18266.560546875,
+ "learning_rate": 9.740416587379615e-05,
+ "loss": 0.4914,
+ "step": 32150
+ },
+ {
+ "epoch": 0.16613266880265812,
+ "grad_norm": 19871.509765625,
+ "learning_rate": 9.739253583090493e-05,
+ "loss": 0.499,
+ "step": 32200
+ },
+ {
+ "epoch": 0.16639063878527094,
+ "grad_norm": 19524.298828125,
+ "learning_rate": 9.738088049079577e-05,
+ "loss": 0.4944,
+ "step": 32250
+ },
+ {
+ "epoch": 0.16664860876788376,
+ "grad_norm": 20308.685546875,
+ "learning_rate": 9.73691998596901e-05,
+ "loss": 0.4941,
+ "step": 32300
+ },
+ {
+ "epoch": 0.16690657875049658,
+ "grad_norm": 19125.52734375,
+ "learning_rate": 9.735749394382278e-05,
+ "loss": 0.4968,
+ "step": 32350
+ },
+ {
+ "epoch": 0.1671645487331094,
+ "grad_norm": 18792.716796875,
+ "learning_rate": 9.734576274944223e-05,
+ "loss": 0.4959,
+ "step": 32400
+ },
+ {
+ "epoch": 0.16742251871572225,
+ "grad_norm": 18521.54296875,
+ "learning_rate": 9.73340062828103e-05,
+ "loss": 0.4913,
+ "step": 32450
+ },
+ {
+ "epoch": 0.16768048869833507,
+ "grad_norm": 19540.41796875,
+ "learning_rate": 9.732222455020241e-05,
+ "loss": 0.4999,
+ "step": 32500
+ },
+ {
+ "epoch": 0.1679384586809479,
+ "grad_norm": 18682.84375,
+ "learning_rate": 9.73104175579074e-05,
+ "loss": 0.4991,
+ "step": 32550
+ },
+ {
+ "epoch": 0.16819642866356072,
+ "grad_norm": 20134.8984375,
+ "learning_rate": 9.72985853122276e-05,
+ "loss": 0.4839,
+ "step": 32600
+ },
+ {
+ "epoch": 0.16845439864617354,
+ "grad_norm": 20375.1484375,
+ "learning_rate": 9.728672781947883e-05,
+ "loss": 0.4941,
+ "step": 32650
+ },
+ {
+ "epoch": 0.16871236862878636,
+ "grad_norm": 19720.98046875,
+ "learning_rate": 9.727484508599042e-05,
+ "loss": 0.4856,
+ "step": 32700
+ },
+ {
+ "epoch": 0.16897033861139918,
+ "grad_norm": 19408.7734375,
+ "learning_rate": 9.726293711810513e-05,
+ "loss": 0.4942,
+ "step": 32750
+ },
+ {
+ "epoch": 0.169228308594012,
+ "grad_norm": 20136.892578125,
+ "learning_rate": 9.725100392217919e-05,
+ "loss": 0.4942,
+ "step": 32800
+ },
+ {
+ "epoch": 0.16948627857662482,
+ "grad_norm": 20555.27734375,
+ "learning_rate": 9.723904550458232e-05,
+ "loss": 0.4907,
+ "step": 32850
+ },
+ {
+ "epoch": 0.16974424855923764,
+ "grad_norm": 18876.787109375,
+ "learning_rate": 9.722706187169769e-05,
+ "loss": 0.4951,
+ "step": 32900
+ },
+ {
+ "epoch": 0.17000221854185046,
+ "grad_norm": 19918.4296875,
+ "learning_rate": 9.721505302992194e-05,
+ "loss": 0.4871,
+ "step": 32950
+ },
+ {
+ "epoch": 0.17026018852446329,
+ "grad_norm": 18593.453125,
+ "learning_rate": 9.720301898566513e-05,
+ "loss": 0.4889,
+ "step": 33000
+ },
+ {
+ "epoch": 0.1705181585070761,
+ "grad_norm": 21007.5625,
+ "learning_rate": 9.719095974535084e-05,
+ "loss": 0.4936,
+ "step": 33050
+ },
+ {
+ "epoch": 0.17077612848968893,
+ "grad_norm": 21749.849609375,
+ "learning_rate": 9.717887531541601e-05,
+ "loss": 0.4915,
+ "step": 33100
+ },
+ {
+ "epoch": 0.17103409847230178,
+ "grad_norm": 19097.896484375,
+ "learning_rate": 9.716676570231114e-05,
+ "loss": 0.4857,
+ "step": 33150
+ },
+ {
+ "epoch": 0.1712920684549146,
+ "grad_norm": 18509.107421875,
+ "learning_rate": 9.715463091250003e-05,
+ "loss": 0.487,
+ "step": 33200
+ },
+ {
+ "epoch": 0.17155003843752742,
+ "grad_norm": 21414.916015625,
+ "learning_rate": 9.714247095246007e-05,
+ "loss": 0.4929,
+ "step": 33250
+ },
+ {
+ "epoch": 0.17180800842014024,
+ "grad_norm": 19836.978515625,
+ "learning_rate": 9.713028582868196e-05,
+ "loss": 0.4948,
+ "step": 33300
+ },
+ {
+ "epoch": 0.17206597840275306,
+ "grad_norm": 18013.787109375,
+ "learning_rate": 9.71180755476699e-05,
+ "loss": 0.4945,
+ "step": 33350
+ },
+ {
+ "epoch": 0.17232394838536588,
+ "grad_norm": 18498.1640625,
+ "learning_rate": 9.71058401159415e-05,
+ "loss": 0.4961,
+ "step": 33400
+ },
+ {
+ "epoch": 0.1725819183679787,
+ "grad_norm": 19871.404296875,
+ "learning_rate": 9.709357954002778e-05,
+ "loss": 0.4896,
+ "step": 33450
+ },
+ {
+ "epoch": 0.17283988835059153,
+ "grad_norm": 20794.05859375,
+ "learning_rate": 9.708129382647324e-05,
+ "loss": 0.4855,
+ "step": 33500
+ },
+ {
+ "epoch": 0.17309785833320435,
+ "grad_norm": 19775.6328125,
+ "learning_rate": 9.706898298183573e-05,
+ "loss": 0.4899,
+ "step": 33550
+ },
+ {
+ "epoch": 0.17335582831581717,
+ "grad_norm": 24329.740234375,
+ "learning_rate": 9.705664701268652e-05,
+ "loss": 0.4879,
+ "step": 33600
+ },
+ {
+ "epoch": 0.17361379829843,
+ "grad_norm": 19666.697265625,
+ "learning_rate": 9.704428592561037e-05,
+ "loss": 0.493,
+ "step": 33650
+ },
+ {
+ "epoch": 0.1738717682810428,
+ "grad_norm": 20382.115234375,
+ "learning_rate": 9.703189972720532e-05,
+ "loss": 0.4922,
+ "step": 33700
+ },
+ {
+ "epoch": 0.17412973826365563,
+ "grad_norm": 20240.46875,
+ "learning_rate": 9.701948842408293e-05,
+ "loss": 0.4908,
+ "step": 33750
+ },
+ {
+ "epoch": 0.17438770824626845,
+ "grad_norm": 18531.224609375,
+ "learning_rate": 9.700705202286811e-05,
+ "loss": 0.489,
+ "step": 33800
+ },
+ {
+ "epoch": 0.17464567822888127,
+ "grad_norm": 19121.0625,
+ "learning_rate": 9.699459053019912e-05,
+ "loss": 0.4884,
+ "step": 33850
+ },
+ {
+ "epoch": 0.17490364821149412,
+ "grad_norm": 19959.931640625,
+ "learning_rate": 9.698210395272773e-05,
+ "loss": 0.4912,
+ "step": 33900
+ },
+ {
+ "epoch": 0.17516161819410694,
+ "grad_norm": 18255.732421875,
+ "learning_rate": 9.696959229711901e-05,
+ "loss": 0.4888,
+ "step": 33950
+ },
+ {
+ "epoch": 0.17541958817671977,
+ "grad_norm": 21808.8671875,
+ "learning_rate": 9.695705557005142e-05,
+ "loss": 0.4945,
+ "step": 34000
+ },
+ {
+ "epoch": 0.1756775581593326,
+ "grad_norm": 18687.521484375,
+ "learning_rate": 9.694449377821685e-05,
+ "loss": 0.4891,
+ "step": 34050
+ },
+ {
+ "epoch": 0.1759355281419454,
+ "grad_norm": 18309.859375,
+ "learning_rate": 9.693190692832053e-05,
+ "loss": 0.4888,
+ "step": 34100
+ },
+ {
+ "epoch": 0.17619349812455823,
+ "grad_norm": 19453.705078125,
+ "learning_rate": 9.691929502708106e-05,
+ "loss": 0.4852,
+ "step": 34150
+ },
+ {
+ "epoch": 0.17645146810717105,
+ "grad_norm": 20964.595703125,
+ "learning_rate": 9.690665808123046e-05,
+ "loss": 0.4931,
+ "step": 34200
+ },
+ {
+ "epoch": 0.17670943808978387,
+ "grad_norm": 20170.5703125,
+ "learning_rate": 9.689399609751405e-05,
+ "loss": 0.4908,
+ "step": 34250
+ },
+ {
+ "epoch": 0.1769674080723967,
+ "grad_norm": 18276.19140625,
+ "learning_rate": 9.688130908269058e-05,
+ "loss": 0.4906,
+ "step": 34300
+ },
+ {
+ "epoch": 0.1772253780550095,
+ "grad_norm": 21062.56640625,
+ "learning_rate": 9.686859704353212e-05,
+ "loss": 0.4911,
+ "step": 34350
+ },
+ {
+ "epoch": 0.17748334803762233,
+ "grad_norm": 21678.6796875,
+ "learning_rate": 9.685585998682414e-05,
+ "loss": 0.4894,
+ "step": 34400
+ },
+ {
+ "epoch": 0.17774131802023516,
+ "grad_norm": 17795.384765625,
+ "learning_rate": 9.684309791936539e-05,
+ "loss": 0.4893,
+ "step": 34450
+ },
+ {
+ "epoch": 0.17799928800284798,
+ "grad_norm": 21536.837890625,
+ "learning_rate": 9.683031084796803e-05,
+ "loss": 0.4889,
+ "step": 34500
+ },
+ {
+ "epoch": 0.1782572579854608,
+ "grad_norm": 20554.423828125,
+ "learning_rate": 9.681749877945756e-05,
+ "loss": 0.4843,
+ "step": 34550
+ },
+ {
+ "epoch": 0.17851522796807365,
+ "grad_norm": 22045.376953125,
+ "learning_rate": 9.680466172067282e-05,
+ "loss": 0.4895,
+ "step": 34600
+ },
+ {
+ "epoch": 0.17877319795068647,
+ "grad_norm": 21406.853515625,
+ "learning_rate": 9.679179967846597e-05,
+ "loss": 0.4914,
+ "step": 34650
+ },
+ {
+ "epoch": 0.1790311679332993,
+ "grad_norm": 20971.037109375,
+ "learning_rate": 9.677891265970252e-05,
+ "loss": 0.485,
+ "step": 34700
+ },
+ {
+ "epoch": 0.1792891379159121,
+ "grad_norm": 20256.73828125,
+ "learning_rate": 9.676600067126129e-05,
+ "loss": 0.4918,
+ "step": 34750
+ },
+ {
+ "epoch": 0.17954710789852493,
+ "grad_norm": 19123.048828125,
+ "learning_rate": 9.67530637200345e-05,
+ "loss": 0.49,
+ "step": 34800
+ },
+ {
+ "epoch": 0.17980507788113775,
+ "grad_norm": 20799.748046875,
+ "learning_rate": 9.674010181292761e-05,
+ "loss": 0.4889,
+ "step": 34850
+ },
+ {
+ "epoch": 0.18006304786375057,
+ "grad_norm": 19569.609375,
+ "learning_rate": 9.672711495685945e-05,
+ "loss": 0.4882,
+ "step": 34900
+ },
+ {
+ "epoch": 0.1803210178463634,
+ "grad_norm": 18339.76171875,
+ "learning_rate": 9.671410315876213e-05,
+ "loss": 0.4884,
+ "step": 34950
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "grad_norm": 20066.099609375,
+ "learning_rate": 9.670106642558111e-05,
+ "loss": 0.4866,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "eval_loss": 0.48020538687705994,
+ "eval_runtime": 3265.3619,
+ "eval_samples_per_second": 949.702,
+ "eval_steps_per_second": 1.855,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18083695781158904,
+ "grad_norm": 18703.037109375,
+ "learning_rate": 9.668800476427515e-05,
+ "loss": 0.4953,
+ "step": 35050
+ },
+ {
+ "epoch": 0.18109492779420186,
+ "grad_norm": 19886.177734375,
+ "learning_rate": 9.667491818181631e-05,
+ "loss": 0.4845,
+ "step": 35100
+ },
+ {
+ "epoch": 0.18135289777681468,
+ "grad_norm": 19349.08203125,
+ "learning_rate": 9.666180668518993e-05,
+ "loss": 0.493,
+ "step": 35150
+ },
+ {
+ "epoch": 0.1816108677594275,
+ "grad_norm": 19786.404296875,
+ "learning_rate": 9.664867028139473e-05,
+ "loss": 0.4815,
+ "step": 35200
+ },
+ {
+ "epoch": 0.18186883774204032,
+ "grad_norm": 21271.05859375,
+ "learning_rate": 9.66355089774426e-05,
+ "loss": 0.4907,
+ "step": 35250
+ },
+ {
+ "epoch": 0.18212680772465317,
+ "grad_norm": 19096.3125,
+ "learning_rate": 9.662232278035885e-05,
+ "loss": 0.4865,
+ "step": 35300
+ },
+ {
+ "epoch": 0.182384777707266,
+ "grad_norm": 20136.935546875,
+ "learning_rate": 9.660911169718196e-05,
+ "loss": 0.4824,
+ "step": 35350
+ },
+ {
+ "epoch": 0.18264274768987881,
+ "grad_norm": 19532.361328125,
+ "learning_rate": 9.65958757349638e-05,
+ "loss": 0.4857,
+ "step": 35400
+ },
+ {
+ "epoch": 0.18290071767249164,
+ "grad_norm": 18227.626953125,
+ "learning_rate": 9.658261490076944e-05,
+ "loss": 0.4871,
+ "step": 35450
+ },
+ {
+ "epoch": 0.18315868765510446,
+ "grad_norm": 21021.564453125,
+ "learning_rate": 9.656932920167727e-05,
+ "loss": 0.485,
+ "step": 35500
+ },
+ {
+ "epoch": 0.18341665763771728,
+ "grad_norm": 19943.9765625,
+ "learning_rate": 9.655601864477893e-05,
+ "loss": 0.4908,
+ "step": 35550
+ },
+ {
+ "epoch": 0.1836746276203301,
+ "grad_norm": 19356.8203125,
+ "learning_rate": 9.654268323717934e-05,
+ "loss": 0.4849,
+ "step": 35600
+ },
+ {
+ "epoch": 0.18393259760294292,
+ "grad_norm": 19431.9453125,
+ "learning_rate": 9.652932298599671e-05,
+ "loss": 0.4927,
+ "step": 35650
+ },
+ {
+ "epoch": 0.18419056758555574,
+ "grad_norm": 18860.0625,
+ "learning_rate": 9.651593789836242e-05,
+ "loss": 0.4879,
+ "step": 35700
+ },
+ {
+ "epoch": 0.18444853756816856,
+ "grad_norm": 18524.46875,
+ "learning_rate": 9.650252798142123e-05,
+ "loss": 0.4877,
+ "step": 35750
+ },
+ {
+ "epoch": 0.18470650755078138,
+ "grad_norm": 18897.322265625,
+ "learning_rate": 9.648909324233107e-05,
+ "loss": 0.4906,
+ "step": 35800
+ },
+ {
+ "epoch": 0.1849644775333942,
+ "grad_norm": 21080.552734375,
+ "learning_rate": 9.647563368826313e-05,
+ "loss": 0.4895,
+ "step": 35850
+ },
+ {
+ "epoch": 0.18522244751600703,
+ "grad_norm": 20014.828125,
+ "learning_rate": 9.64621493264019e-05,
+ "loss": 0.4816,
+ "step": 35900
+ },
+ {
+ "epoch": 0.18548041749861985,
+ "grad_norm": 19470.3984375,
+ "learning_rate": 9.644864016394504e-05,
+ "loss": 0.4812,
+ "step": 35950
+ },
+ {
+ "epoch": 0.1857383874812327,
+ "grad_norm": 21915.400390625,
+ "learning_rate": 9.643510620810348e-05,
+ "loss": 0.4859,
+ "step": 36000
+ },
+ {
+ "epoch": 0.18599635746384552,
+ "grad_norm": 19367.009765625,
+ "learning_rate": 9.642154746610139e-05,
+ "loss": 0.4905,
+ "step": 36050
+ },
+ {
+ "epoch": 0.18625432744645834,
+ "grad_norm": 18379.70703125,
+ "learning_rate": 9.640796394517616e-05,
+ "loss": 0.4878,
+ "step": 36100
+ },
+ {
+ "epoch": 0.18651229742907116,
+ "grad_norm": 18933.455078125,
+ "learning_rate": 9.639435565257842e-05,
+ "loss": 0.4877,
+ "step": 36150
+ },
+ {
+ "epoch": 0.18677026741168398,
+ "grad_norm": 19026.484375,
+ "learning_rate": 9.638072259557201e-05,
+ "loss": 0.4873,
+ "step": 36200
+ },
+ {
+ "epoch": 0.1870282373942968,
+ "grad_norm": 21111.09375,
+ "learning_rate": 9.636706478143398e-05,
+ "loss": 0.4815,
+ "step": 36250
+ },
+ {
+ "epoch": 0.18728620737690962,
+ "grad_norm": 19362.541015625,
+ "learning_rate": 9.635338221745462e-05,
+ "loss": 0.4854,
+ "step": 36300
+ },
+ {
+ "epoch": 0.18754417735952245,
+ "grad_norm": 19861.58984375,
+ "learning_rate": 9.63396749109374e-05,
+ "loss": 0.4832,
+ "step": 36350
+ },
+ {
+ "epoch": 0.18780214734213527,
+ "grad_norm": 18793.623046875,
+ "learning_rate": 9.632594286919905e-05,
+ "loss": 0.4811,
+ "step": 36400
+ },
+ {
+ "epoch": 0.1880601173247481,
+ "grad_norm": 20452.26953125,
+ "learning_rate": 9.631218609956943e-05,
+ "loss": 0.4872,
+ "step": 36450
+ },
+ {
+ "epoch": 0.1883180873073609,
+ "grad_norm": 19237.203125,
+ "learning_rate": 9.629840460939165e-05,
+ "loss": 0.4941,
+ "step": 36500
+ },
+ {
+ "epoch": 0.18857605728997373,
+ "grad_norm": 19828.84765625,
+ "learning_rate": 9.628459840602202e-05,
+ "loss": 0.4869,
+ "step": 36550
+ },
+ {
+ "epoch": 0.18883402727258655,
+ "grad_norm": 18171.08203125,
+ "learning_rate": 9.627076749683e-05,
+ "loss": 0.4915,
+ "step": 36600
+ },
+ {
+ "epoch": 0.18909199725519937,
+ "grad_norm": 21346.9375,
+ "learning_rate": 9.625691188919827e-05,
+ "loss": 0.4913,
+ "step": 36650
+ },
+ {
+ "epoch": 0.1893499672378122,
+ "grad_norm": 20066.7890625,
+ "learning_rate": 9.62430315905227e-05,
+ "loss": 0.4809,
+ "step": 36700
+ },
+ {
+ "epoch": 0.18960793722042504,
+ "grad_norm": 20736.546875,
+ "learning_rate": 9.622912660821231e-05,
+ "loss": 0.4849,
+ "step": 36750
+ },
+ {
+ "epoch": 0.18986590720303786,
+ "grad_norm": 20891.958984375,
+ "learning_rate": 9.62151969496893e-05,
+ "loss": 0.4831,
+ "step": 36800
+ },
+ {
+ "epoch": 0.19012387718565069,
+ "grad_norm": 21394.1953125,
+ "learning_rate": 9.620124262238908e-05,
+ "loss": 0.4855,
+ "step": 36850
+ },
+ {
+ "epoch": 0.1903818471682635,
+ "grad_norm": 19725.89453125,
+ "learning_rate": 9.618726363376016e-05,
+ "loss": 0.48,
+ "step": 36900
+ },
+ {
+ "epoch": 0.19063981715087633,
+ "grad_norm": 21622.78125,
+ "learning_rate": 9.617325999126429e-05,
+ "loss": 0.4832,
+ "step": 36950
+ },
+ {
+ "epoch": 0.19089778713348915,
+ "grad_norm": 22529.548828125,
+ "learning_rate": 9.615923170237633e-05,
+ "loss": 0.4852,
+ "step": 37000
+ },
+ {
+ "epoch": 0.19115575711610197,
+ "grad_norm": 21136.404296875,
+ "learning_rate": 9.614517877458428e-05,
+ "loss": 0.4816,
+ "step": 37050
+ },
+ {
+ "epoch": 0.1914137270987148,
+ "grad_norm": 19039.330078125,
+ "learning_rate": 9.61311012153894e-05,
+ "loss": 0.4835,
+ "step": 37100
+ },
+ {
+ "epoch": 0.1916716970813276,
+ "grad_norm": 19755.974609375,
+ "learning_rate": 9.611699903230594e-05,
+ "loss": 0.4846,
+ "step": 37150
+ },
+ {
+ "epoch": 0.19192966706394043,
+ "grad_norm": 19061.28515625,
+ "learning_rate": 9.610287223286139e-05,
+ "loss": 0.4816,
+ "step": 37200
+ },
+ {
+ "epoch": 0.19218763704655326,
+ "grad_norm": 21649.275390625,
+ "learning_rate": 9.608872082459639e-05,
+ "loss": 0.4837,
+ "step": 37250
+ },
+ {
+ "epoch": 0.19244560702916608,
+ "grad_norm": 19856.759765625,
+ "learning_rate": 9.607454481506466e-05,
+ "loss": 0.4848,
+ "step": 37300
+ },
+ {
+ "epoch": 0.1927035770117789,
+ "grad_norm": 19442.810546875,
+ "learning_rate": 9.60603442118331e-05,
+ "loss": 0.4828,
+ "step": 37350
+ },
+ {
+ "epoch": 0.19296154699439172,
+ "grad_norm": 20076.44140625,
+ "learning_rate": 9.604611902248168e-05,
+ "loss": 0.4896,
+ "step": 37400
+ },
+ {
+ "epoch": 0.19321951697700457,
+ "grad_norm": 18413.908203125,
+ "learning_rate": 9.603186925460359e-05,
+ "loss": 0.4806,
+ "step": 37450
+ },
+ {
+ "epoch": 0.1934774869596174,
+ "grad_norm": 19618.3984375,
+ "learning_rate": 9.601759491580503e-05,
+ "loss": 0.4864,
+ "step": 37500
+ },
+ {
+ "epoch": 0.1937354569422302,
+ "grad_norm": 20347.177734375,
+ "learning_rate": 9.600329601370539e-05,
+ "loss": 0.489,
+ "step": 37550
+ },
+ {
+ "epoch": 0.19399342692484303,
+ "grad_norm": 19288.380859375,
+ "learning_rate": 9.598897255593713e-05,
+ "loss": 0.4829,
+ "step": 37600
+ },
+ {
+ "epoch": 0.19425139690745585,
+ "grad_norm": 20326.1484375,
+ "learning_rate": 9.597462455014585e-05,
+ "loss": 0.4856,
+ "step": 37650
+ },
+ {
+ "epoch": 0.19450936689006867,
+ "grad_norm": 19598.14453125,
+ "learning_rate": 9.596025200399024e-05,
+ "loss": 0.4831,
+ "step": 37700
+ },
+ {
+ "epoch": 0.1947673368726815,
+ "grad_norm": 20041.28125,
+ "learning_rate": 9.594585492514205e-05,
+ "loss": 0.4822,
+ "step": 37750
+ },
+ {
+ "epoch": 0.19502530685529432,
+ "grad_norm": 20853.201171875,
+ "learning_rate": 9.593143332128623e-05,
+ "loss": 0.4874,
+ "step": 37800
+ },
+ {
+ "epoch": 0.19528327683790714,
+ "grad_norm": 21364.455078125,
+ "learning_rate": 9.591698720012068e-05,
+ "loss": 0.482,
+ "step": 37850
+ },
+ {
+ "epoch": 0.19554124682051996,
+ "grad_norm": 18795.447265625,
+ "learning_rate": 9.590251656935652e-05,
+ "loss": 0.489,
+ "step": 37900
+ },
+ {
+ "epoch": 0.19579921680313278,
+ "grad_norm": 23039.455078125,
+ "learning_rate": 9.588802143671784e-05,
+ "loss": 0.4879,
+ "step": 37950
+ },
+ {
+ "epoch": 0.1960571867857456,
+ "grad_norm": 19842.263671875,
+ "learning_rate": 9.58735018099419e-05,
+ "loss": 0.4869,
+ "step": 38000
+ },
+ {
+ "epoch": 0.19631515676835842,
+ "grad_norm": 21241.00390625,
+ "learning_rate": 9.585895769677897e-05,
+ "loss": 0.4746,
+ "step": 38050
+ },
+ {
+ "epoch": 0.19657312675097124,
+ "grad_norm": 19803.2265625,
+ "learning_rate": 9.584438910499245e-05,
+ "loss": 0.4824,
+ "step": 38100
+ },
+ {
+ "epoch": 0.1968310967335841,
+ "grad_norm": 18873.744140625,
+ "learning_rate": 9.582979604235873e-05,
+ "loss": 0.4817,
+ "step": 38150
+ },
+ {
+ "epoch": 0.19708906671619691,
+ "grad_norm": 19128.8828125,
+ "learning_rate": 9.581517851666734e-05,
+ "loss": 0.482,
+ "step": 38200
+ },
+ {
+ "epoch": 0.19734703669880974,
+ "grad_norm": 20514.16796875,
+ "learning_rate": 9.580053653572081e-05,
+ "loss": 0.4781,
+ "step": 38250
+ },
+ {
+ "epoch": 0.19760500668142256,
+ "grad_norm": 19135.58984375,
+ "learning_rate": 9.578587010733475e-05,
+ "loss": 0.4815,
+ "step": 38300
+ },
+ {
+ "epoch": 0.19786297666403538,
+ "grad_norm": 22849.197265625,
+ "learning_rate": 9.577117923933782e-05,
+ "loss": 0.4794,
+ "step": 38350
+ },
+ {
+ "epoch": 0.1981209466466482,
+ "grad_norm": 21278.736328125,
+ "learning_rate": 9.575646393957173e-05,
+ "loss": 0.4832,
+ "step": 38400
+ },
+ {
+ "epoch": 0.19837891662926102,
+ "grad_norm": 19292.162109375,
+ "learning_rate": 9.57417242158912e-05,
+ "loss": 0.4876,
+ "step": 38450
+ },
+ {
+ "epoch": 0.19863688661187384,
+ "grad_norm": 17778.423828125,
+ "learning_rate": 9.572696007616402e-05,
+ "loss": 0.4842,
+ "step": 38500
+ },
+ {
+ "epoch": 0.19889485659448666,
+ "grad_norm": 18855.140625,
+ "learning_rate": 9.5712171528271e-05,
+ "loss": 0.4846,
+ "step": 38550
+ },
+ {
+ "epoch": 0.19915282657709948,
+ "grad_norm": 21640.8203125,
+ "learning_rate": 9.5697358580106e-05,
+ "loss": 0.4829,
+ "step": 38600
+ },
+ {
+ "epoch": 0.1994107965597123,
+ "grad_norm": 19358.3828125,
+ "learning_rate": 9.568252123957586e-05,
+ "loss": 0.4806,
+ "step": 38650
+ },
+ {
+ "epoch": 0.19966876654232513,
+ "grad_norm": 20781.98828125,
+ "learning_rate": 9.566765951460046e-05,
+ "loss": 0.4849,
+ "step": 38700
+ },
+ {
+ "epoch": 0.19992673652493795,
+ "grad_norm": 20604.7265625,
+ "learning_rate": 9.565277341311271e-05,
+ "loss": 0.4856,
+ "step": 38750
+ },
+ {
+ "epoch": 0.20018470650755077,
+ "grad_norm": 20930.048828125,
+ "learning_rate": 9.563786294305854e-05,
+ "loss": 0.4812,
+ "step": 38800
+ },
+ {
+ "epoch": 0.20044267649016362,
+ "grad_norm": 22721.259765625,
+ "learning_rate": 9.562292811239686e-05,
+ "loss": 0.4857,
+ "step": 38850
+ },
+ {
+ "epoch": 0.20070064647277644,
+ "grad_norm": 19667.57421875,
+ "learning_rate": 9.560796892909957e-05,
+ "loss": 0.483,
+ "step": 38900
+ },
+ {
+ "epoch": 0.20095861645538926,
+ "grad_norm": 18259.19140625,
+ "learning_rate": 9.559298540115164e-05,
+ "loss": 0.4851,
+ "step": 38950
+ },
+ {
+ "epoch": 0.20121658643800208,
+ "grad_norm": 20980.18359375,
+ "learning_rate": 9.557797753655096e-05,
+ "loss": 0.4815,
+ "step": 39000
+ },
+ {
+ "epoch": 0.2014745564206149,
+ "grad_norm": 19840.025390625,
+ "learning_rate": 9.556294534330841e-05,
+ "loss": 0.4878,
+ "step": 39050
+ },
+ {
+ "epoch": 0.20173252640322772,
+ "grad_norm": 20406.69921875,
+ "learning_rate": 9.554788882944792e-05,
+ "loss": 0.481,
+ "step": 39100
+ },
+ {
+ "epoch": 0.20199049638584055,
+ "grad_norm": 19177.447265625,
+ "learning_rate": 9.553280800300637e-05,
+ "loss": 0.4857,
+ "step": 39150
+ },
+ {
+ "epoch": 0.20224846636845337,
+ "grad_norm": 21242.21875,
+ "learning_rate": 9.551770287203359e-05,
+ "loss": 0.4889,
+ "step": 39200
+ },
+ {
+ "epoch": 0.2025064363510662,
+ "grad_norm": 19343.58203125,
+ "learning_rate": 9.550257344459241e-05,
+ "loss": 0.482,
+ "step": 39250
+ },
+ {
+ "epoch": 0.202764406333679,
+ "grad_norm": 21327.587890625,
+ "learning_rate": 9.548741972875863e-05,
+ "loss": 0.4802,
+ "step": 39300
+ },
+ {
+ "epoch": 0.20302237631629183,
+ "grad_norm": 21366.98828125,
+ "learning_rate": 9.547224173262102e-05,
+ "loss": 0.4779,
+ "step": 39350
+ },
+ {
+ "epoch": 0.20328034629890465,
+ "grad_norm": 20876.39453125,
+ "learning_rate": 9.545703946428128e-05,
+ "loss": 0.4843,
+ "step": 39400
+ },
+ {
+ "epoch": 0.20353831628151747,
+ "grad_norm": 21280.873046875,
+ "learning_rate": 9.544181293185413e-05,
+ "loss": 0.4805,
+ "step": 39450
+ },
+ {
+ "epoch": 0.2037962862641303,
+ "grad_norm": 19546.134765625,
+ "learning_rate": 9.542656214346713e-05,
+ "loss": 0.4753,
+ "step": 39500
+ },
+ {
+ "epoch": 0.20405425624674312,
+ "grad_norm": 19179.05859375,
+ "learning_rate": 9.541128710726091e-05,
+ "loss": 0.4812,
+ "step": 39550
+ },
+ {
+ "epoch": 0.20431222622935596,
+ "grad_norm": 23525.50390625,
+ "learning_rate": 9.539598783138897e-05,
+ "loss": 0.4843,
+ "step": 39600
+ },
+ {
+ "epoch": 0.20457019621196879,
+ "grad_norm": 19369.103515625,
+ "learning_rate": 9.538066432401775e-05,
+ "loss": 0.4788,
+ "step": 39650
+ },
+ {
+ "epoch": 0.2048281661945816,
+ "grad_norm": 20777.119140625,
+ "learning_rate": 9.536531659332667e-05,
+ "loss": 0.4779,
+ "step": 39700
+ },
+ {
+ "epoch": 0.20508613617719443,
+ "grad_norm": 18987.701171875,
+ "learning_rate": 9.534994464750806e-05,
+ "loss": 0.4807,
+ "step": 39750
+ },
+ {
+ "epoch": 0.20534410615980725,
+ "grad_norm": 19523.873046875,
+ "learning_rate": 9.533454849476712e-05,
+ "loss": 0.4798,
+ "step": 39800
+ },
+ {
+ "epoch": 0.20560207614242007,
+ "grad_norm": 21302.05859375,
+ "learning_rate": 9.531912814332206e-05,
+ "loss": 0.4811,
+ "step": 39850
+ },
+ {
+ "epoch": 0.2058600461250329,
+ "grad_norm": 21545.626953125,
+ "learning_rate": 9.530368360140394e-05,
+ "loss": 0.4814,
+ "step": 39900
+ },
+ {
+ "epoch": 0.2061180161076457,
+ "grad_norm": 22709.7265625,
+ "learning_rate": 9.528821487725678e-05,
+ "loss": 0.4827,
+ "step": 39950
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "grad_norm": 20853.228515625,
+ "learning_rate": 9.527272197913746e-05,
+ "loss": 0.4838,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "eval_loss": 0.47092095017433167,
+ "eval_runtime": 3339.7722,
+ "eval_samples_per_second": 928.542,
+ "eval_steps_per_second": 1.814,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20663395607287136,
+ "grad_norm": 18389.748046875,
+ "learning_rate": 9.525720491531581e-05,
+ "loss": 0.4809,
+ "step": 40050
+ },
+ {
+ "epoch": 0.20689192605548418,
+ "grad_norm": 20328.59765625,
+ "learning_rate": 9.524166369407453e-05,
+ "loss": 0.4827,
+ "step": 40100
+ },
+ {
+ "epoch": 0.207149896038097,
+ "grad_norm": 21094.966796875,
+ "learning_rate": 9.522609832370924e-05,
+ "loss": 0.484,
+ "step": 40150
+ },
+ {
+ "epoch": 0.20740786602070982,
+ "grad_norm": 22630.64453125,
+ "learning_rate": 9.52105088125284e-05,
+ "loss": 0.4829,
+ "step": 40200
+ },
+ {
+ "epoch": 0.20766583600332264,
+ "grad_norm": 19477.7265625,
+ "learning_rate": 9.51948951688534e-05,
+ "loss": 0.4793,
+ "step": 40250
+ },
+ {
+ "epoch": 0.2079238059859355,
+ "grad_norm": 20242.53125,
+ "learning_rate": 9.517925740101851e-05,
+ "loss": 0.4797,
+ "step": 40300
+ },
+ {
+ "epoch": 0.2081817759685483,
+ "grad_norm": 19952.421875,
+ "learning_rate": 9.516359551737087e-05,
+ "loss": 0.4785,
+ "step": 40350
+ },
+ {
+ "epoch": 0.20843974595116113,
+ "grad_norm": 19216.220703125,
+ "learning_rate": 9.514790952627049e-05,
+ "loss": 0.4753,
+ "step": 40400
+ },
+ {
+ "epoch": 0.20869771593377395,
+ "grad_norm": 20297.515625,
+ "learning_rate": 9.513219943609024e-05,
+ "loss": 0.4792,
+ "step": 40450
+ },
+ {
+ "epoch": 0.20895568591638677,
+ "grad_norm": 19528.7890625,
+ "learning_rate": 9.511646525521585e-05,
+ "loss": 0.4801,
+ "step": 40500
+ },
+ {
+ "epoch": 0.2092136558989996,
+ "grad_norm": 18037.7890625,
+ "learning_rate": 9.510070699204597e-05,
+ "loss": 0.483,
+ "step": 40550
+ },
+ {
+ "epoch": 0.20947162588161242,
+ "grad_norm": 20636.4296875,
+ "learning_rate": 9.508492465499199e-05,
+ "loss": 0.4761,
+ "step": 40600
+ },
+ {
+ "epoch": 0.20972959586422524,
+ "grad_norm": 20096.857421875,
+ "learning_rate": 9.506911825247827e-05,
+ "loss": 0.4804,
+ "step": 40650
+ },
+ {
+ "epoch": 0.20998756584683806,
+ "grad_norm": 20855.619140625,
+ "learning_rate": 9.505328779294192e-05,
+ "loss": 0.4823,
+ "step": 40700
+ },
+ {
+ "epoch": 0.21024553582945088,
+ "grad_norm": 19640.521484375,
+ "learning_rate": 9.503743328483296e-05,
+ "loss": 0.4818,
+ "step": 40750
+ },
+ {
+ "epoch": 0.2105035058120637,
+ "grad_norm": 20990.525390625,
+ "learning_rate": 9.50215547366142e-05,
+ "loss": 0.4804,
+ "step": 40800
+ },
+ {
+ "epoch": 0.21076147579467652,
+ "grad_norm": 18773.564453125,
+ "learning_rate": 9.500565215676132e-05,
+ "loss": 0.4798,
+ "step": 40850
+ },
+ {
+ "epoch": 0.21101944577728934,
+ "grad_norm": 18688.7265625,
+ "learning_rate": 9.498972555376282e-05,
+ "loss": 0.4773,
+ "step": 40900
+ },
+ {
+ "epoch": 0.21127741575990217,
+ "grad_norm": 22649.3671875,
+ "learning_rate": 9.497377493611998e-05,
+ "loss": 0.478,
+ "step": 40950
+ },
+ {
+ "epoch": 0.21153538574251501,
+ "grad_norm": 19575.95703125,
+ "learning_rate": 9.495780031234694e-05,
+ "loss": 0.4809,
+ "step": 41000
+ },
+ {
+ "epoch": 0.21179335572512784,
+ "grad_norm": 18587.681640625,
+ "learning_rate": 9.494180169097067e-05,
+ "loss": 0.4805,
+ "step": 41050
+ },
+ {
+ "epoch": 0.21205132570774066,
+ "grad_norm": 19466.5703125,
+ "learning_rate": 9.492577908053089e-05,
+ "loss": 0.4772,
+ "step": 41100
+ },
+ {
+ "epoch": 0.21230929569035348,
+ "grad_norm": 21085.15234375,
+ "learning_rate": 9.490973248958018e-05,
+ "loss": 0.4787,
+ "step": 41150
+ },
+ {
+ "epoch": 0.2125672656729663,
+ "grad_norm": 21866.95703125,
+ "learning_rate": 9.489366192668388e-05,
+ "loss": 0.4803,
+ "step": 41200
+ },
+ {
+ "epoch": 0.21282523565557912,
+ "grad_norm": 20759.609375,
+ "learning_rate": 9.487756740042015e-05,
+ "loss": 0.4782,
+ "step": 41250
+ },
+ {
+ "epoch": 0.21308320563819194,
+ "grad_norm": 20565.51171875,
+ "learning_rate": 9.486144891937997e-05,
+ "loss": 0.4765,
+ "step": 41300
+ },
+ {
+ "epoch": 0.21334117562080476,
+ "grad_norm": 21536.017578125,
+ "learning_rate": 9.484530649216705e-05,
+ "loss": 0.4753,
+ "step": 41350
+ },
+ {
+ "epoch": 0.21359914560341758,
+ "grad_norm": 19452.001953125,
+ "learning_rate": 9.482914012739788e-05,
+ "loss": 0.4807,
+ "step": 41400
+ },
+ {
+ "epoch": 0.2138571155860304,
+ "grad_norm": 21220.927734375,
+ "learning_rate": 9.481294983370179e-05,
+ "loss": 0.4803,
+ "step": 41450
+ },
+ {
+ "epoch": 0.21411508556864323,
+ "grad_norm": 18278.884765625,
+ "learning_rate": 9.479673561972082e-05,
+ "loss": 0.4807,
+ "step": 41500
+ },
+ {
+ "epoch": 0.21437305555125605,
+ "grad_norm": 21568.13671875,
+ "learning_rate": 9.478049749410983e-05,
+ "loss": 0.4751,
+ "step": 41550
+ },
+ {
+ "epoch": 0.21463102553386887,
+ "grad_norm": 21004.734375,
+ "learning_rate": 9.47642354655364e-05,
+ "loss": 0.4828,
+ "step": 41600
+ },
+ {
+ "epoch": 0.2148889955164817,
+ "grad_norm": 20709.193359375,
+ "learning_rate": 9.474794954268089e-05,
+ "loss": 0.477,
+ "step": 41650
+ },
+ {
+ "epoch": 0.21514696549909454,
+ "grad_norm": 21408.3671875,
+ "learning_rate": 9.47316397342364e-05,
+ "loss": 0.4783,
+ "step": 41700
+ },
+ {
+ "epoch": 0.21540493548170736,
+ "grad_norm": 18606.6328125,
+ "learning_rate": 9.47153060489088e-05,
+ "loss": 0.4771,
+ "step": 41750
+ },
+ {
+ "epoch": 0.21566290546432018,
+ "grad_norm": 19498.20703125,
+ "learning_rate": 9.469894849541667e-05,
+ "loss": 0.4782,
+ "step": 41800
+ },
+ {
+ "epoch": 0.215920875446933,
+ "grad_norm": 20441.9765625,
+ "learning_rate": 9.46825670824914e-05,
+ "loss": 0.4769,
+ "step": 41850
+ },
+ {
+ "epoch": 0.21617884542954582,
+ "grad_norm": 20925.109375,
+ "learning_rate": 9.466616181887704e-05,
+ "loss": 0.4858,
+ "step": 41900
+ },
+ {
+ "epoch": 0.21643681541215865,
+ "grad_norm": 21410.38671875,
+ "learning_rate": 9.464973271333042e-05,
+ "loss": 0.4791,
+ "step": 41950
+ },
+ {
+ "epoch": 0.21669478539477147,
+ "grad_norm": 19169.583984375,
+ "learning_rate": 9.463327977462106e-05,
+ "loss": 0.4783,
+ "step": 42000
+ },
+ {
+ "epoch": 0.2169527553773843,
+ "grad_norm": 19487.3359375,
+ "learning_rate": 9.461680301153124e-05,
+ "loss": 0.4792,
+ "step": 42050
+ },
+ {
+ "epoch": 0.2172107253599971,
+ "grad_norm": 21303.861328125,
+ "learning_rate": 9.460030243285592e-05,
+ "loss": 0.4811,
+ "step": 42100
+ },
+ {
+ "epoch": 0.21746869534260993,
+ "grad_norm": 21529.490234375,
+ "learning_rate": 9.458377804740279e-05,
+ "loss": 0.4761,
+ "step": 42150
+ },
+ {
+ "epoch": 0.21772666532522275,
+ "grad_norm": 21356.505859375,
+ "learning_rate": 9.456722986399227e-05,
+ "loss": 0.477,
+ "step": 42200
+ },
+ {
+ "epoch": 0.21798463530783557,
+ "grad_norm": 19551.33203125,
+ "learning_rate": 9.455065789145742e-05,
+ "loss": 0.4777,
+ "step": 42250
+ },
+ {
+ "epoch": 0.2182426052904484,
+ "grad_norm": 21424.58984375,
+ "learning_rate": 9.453406213864408e-05,
+ "loss": 0.4759,
+ "step": 42300
+ },
+ {
+ "epoch": 0.21850057527306121,
+ "grad_norm": 18835.1953125,
+ "learning_rate": 9.451744261441072e-05,
+ "loss": 0.4749,
+ "step": 42350
+ },
+ {
+ "epoch": 0.21875854525567404,
+ "grad_norm": 20333.490234375,
+ "learning_rate": 9.450079932762852e-05,
+ "loss": 0.4786,
+ "step": 42400
+ },
+ {
+ "epoch": 0.21901651523828689,
+ "grad_norm": 18957.232421875,
+ "learning_rate": 9.448413228718134e-05,
+ "loss": 0.4778,
+ "step": 42450
+ },
+ {
+ "epoch": 0.2192744852208997,
+ "grad_norm": 20251.939453125,
+ "learning_rate": 9.446744150196574e-05,
+ "loss": 0.4759,
+ "step": 42500
+ },
+ {
+ "epoch": 0.21953245520351253,
+ "grad_norm": 20740.82421875,
+ "learning_rate": 9.445072698089091e-05,
+ "loss": 0.4782,
+ "step": 42550
+ },
+ {
+ "epoch": 0.21979042518612535,
+ "grad_norm": 19501.91015625,
+ "learning_rate": 9.443398873287877e-05,
+ "loss": 0.479,
+ "step": 42600
+ },
+ {
+ "epoch": 0.22004839516873817,
+ "grad_norm": 20895.58984375,
+ "learning_rate": 9.441722676686386e-05,
+ "loss": 0.4754,
+ "step": 42650
+ },
+ {
+ "epoch": 0.220306365151351,
+ "grad_norm": 19932.66796875,
+ "learning_rate": 9.440044109179338e-05,
+ "loss": 0.4778,
+ "step": 42700
+ },
+ {
+ "epoch": 0.2205643351339638,
+ "grad_norm": 20158.693359375,
+ "learning_rate": 9.438363171662722e-05,
+ "loss": 0.4755,
+ "step": 42750
+ },
+ {
+ "epoch": 0.22082230511657663,
+ "grad_norm": 19128.953125,
+ "learning_rate": 9.436679865033789e-05,
+ "loss": 0.4744,
+ "step": 42800
+ },
+ {
+ "epoch": 0.22108027509918945,
+ "grad_norm": 19743.517578125,
+ "learning_rate": 9.434994190191054e-05,
+ "loss": 0.4781,
+ "step": 42850
+ },
+ {
+ "epoch": 0.22133824508180228,
+ "grad_norm": 17826.703125,
+ "learning_rate": 9.4333061480343e-05,
+ "loss": 0.4762,
+ "step": 42900
+ },
+ {
+ "epoch": 0.2215962150644151,
+ "grad_norm": 20606.48046875,
+ "learning_rate": 9.43161573946457e-05,
+ "loss": 0.4741,
+ "step": 42950
+ },
+ {
+ "epoch": 0.22185418504702792,
+ "grad_norm": 20116.66796875,
+ "learning_rate": 9.429922965384172e-05,
+ "loss": 0.4766,
+ "step": 43000
+ },
+ {
+ "epoch": 0.22211215502964074,
+ "grad_norm": 20560.970703125,
+ "learning_rate": 9.428227826696674e-05,
+ "loss": 0.481,
+ "step": 43050
+ },
+ {
+ "epoch": 0.22237012501225356,
+ "grad_norm": 20832.01953125,
+ "learning_rate": 9.42653032430691e-05,
+ "loss": 0.4806,
+ "step": 43100
+ },
+ {
+ "epoch": 0.2226280949948664,
+ "grad_norm": 18686.953125,
+ "learning_rate": 9.424830459120974e-05,
+ "loss": 0.4796,
+ "step": 43150
+ },
+ {
+ "epoch": 0.22288606497747923,
+ "grad_norm": 21061.240234375,
+ "learning_rate": 9.423128232046223e-05,
+ "loss": 0.474,
+ "step": 43200
+ },
+ {
+ "epoch": 0.22314403496009205,
+ "grad_norm": 21862.25,
+ "learning_rate": 9.421423643991267e-05,
+ "loss": 0.4721,
+ "step": 43250
+ },
+ {
+ "epoch": 0.22340200494270487,
+ "grad_norm": 18299.23828125,
+ "learning_rate": 9.419716695865988e-05,
+ "loss": 0.4744,
+ "step": 43300
+ },
+ {
+ "epoch": 0.2236599749253177,
+ "grad_norm": 20387.876953125,
+ "learning_rate": 9.418007388581517e-05,
+ "loss": 0.4748,
+ "step": 43350
+ },
+ {
+ "epoch": 0.22391794490793052,
+ "grad_norm": 21721.740234375,
+ "learning_rate": 9.416295723050254e-05,
+ "loss": 0.4782,
+ "step": 43400
+ },
+ {
+ "epoch": 0.22417591489054334,
+ "grad_norm": 20274.72265625,
+ "learning_rate": 9.414581700185851e-05,
+ "loss": 0.4734,
+ "step": 43450
+ },
+ {
+ "epoch": 0.22443388487315616,
+ "grad_norm": 22443.296875,
+ "learning_rate": 9.41286532090322e-05,
+ "loss": 0.4734,
+ "step": 43500
+ },
+ {
+ "epoch": 0.22469185485576898,
+ "grad_norm": 19874.8203125,
+ "learning_rate": 9.411146586118529e-05,
+ "loss": 0.4755,
+ "step": 43550
+ },
+ {
+ "epoch": 0.2249498248383818,
+ "grad_norm": 20362.3125,
+ "learning_rate": 9.409425496749209e-05,
+ "loss": 0.4776,
+ "step": 43600
+ },
+ {
+ "epoch": 0.22520779482099462,
+ "grad_norm": 22146.5078125,
+ "learning_rate": 9.40770205371394e-05,
+ "loss": 0.4784,
+ "step": 43650
+ },
+ {
+ "epoch": 0.22546576480360744,
+ "grad_norm": 19917.83203125,
+ "learning_rate": 9.405976257932667e-05,
+ "loss": 0.4744,
+ "step": 43700
+ },
+ {
+ "epoch": 0.22572373478622026,
+ "grad_norm": 19296.904296875,
+ "learning_rate": 9.404248110326583e-05,
+ "loss": 0.4766,
+ "step": 43750
+ },
+ {
+ "epoch": 0.22598170476883309,
+ "grad_norm": 20648.35546875,
+ "learning_rate": 9.402517611818142e-05,
+ "loss": 0.4801,
+ "step": 43800
+ },
+ {
+ "epoch": 0.22623967475144593,
+ "grad_norm": 21750.517578125,
+ "learning_rate": 9.40078476333105e-05,
+ "loss": 0.4752,
+ "step": 43850
+ },
+ {
+ "epoch": 0.22649764473405876,
+ "grad_norm": 21233.337890625,
+ "learning_rate": 9.399049565790266e-05,
+ "loss": 0.4758,
+ "step": 43900
+ },
+ {
+ "epoch": 0.22675561471667158,
+ "grad_norm": 21952.6796875,
+ "learning_rate": 9.397312020122006e-05,
+ "loss": 0.4755,
+ "step": 43950
+ },
+ {
+ "epoch": 0.2270135846992844,
+ "grad_norm": 18598.826171875,
+ "learning_rate": 9.39557212725374e-05,
+ "loss": 0.4725,
+ "step": 44000
+ },
+ {
+ "epoch": 0.22727155468189722,
+ "grad_norm": 20325.51171875,
+ "learning_rate": 9.393829888114188e-05,
+ "loss": 0.4789,
+ "step": 44050
+ },
+ {
+ "epoch": 0.22752952466451004,
+ "grad_norm": 17499.228515625,
+ "learning_rate": 9.392085303633323e-05,
+ "loss": 0.4738,
+ "step": 44100
+ },
+ {
+ "epoch": 0.22778749464712286,
+ "grad_norm": 21283.970703125,
+ "learning_rate": 9.39033837474237e-05,
+ "loss": 0.4743,
+ "step": 44150
+ },
+ {
+ "epoch": 0.22804546462973568,
+ "grad_norm": 19672.765625,
+ "learning_rate": 9.388589102373807e-05,
+ "loss": 0.4751,
+ "step": 44200
+ },
+ {
+ "epoch": 0.2283034346123485,
+ "grad_norm": 19722.314453125,
+ "learning_rate": 9.386837487461361e-05,
+ "loss": 0.4767,
+ "step": 44250
+ },
+ {
+ "epoch": 0.22856140459496133,
+ "grad_norm": 19948.154296875,
+ "learning_rate": 9.38508353094001e-05,
+ "loss": 0.4765,
+ "step": 44300
+ },
+ {
+ "epoch": 0.22881937457757415,
+ "grad_norm": 19880.611328125,
+ "learning_rate": 9.383327233745984e-05,
+ "loss": 0.4754,
+ "step": 44350
+ },
+ {
+ "epoch": 0.22907734456018697,
+ "grad_norm": 20052.91796875,
+ "learning_rate": 9.381568596816757e-05,
+ "loss": 0.4801,
+ "step": 44400
+ },
+ {
+ "epoch": 0.2293353145427998,
+ "grad_norm": 23129.869140625,
+ "learning_rate": 9.379807621091057e-05,
+ "loss": 0.4713,
+ "step": 44450
+ },
+ {
+ "epoch": 0.2295932845254126,
+ "grad_norm": 19922.0703125,
+ "learning_rate": 9.37804430750886e-05,
+ "loss": 0.4736,
+ "step": 44500
+ },
+ {
+ "epoch": 0.22985125450802546,
+ "grad_norm": 19704.24609375,
+ "learning_rate": 9.376278657011388e-05,
+ "loss": 0.4682,
+ "step": 44550
+ },
+ {
+ "epoch": 0.23010922449063828,
+ "grad_norm": 19080.125,
+ "learning_rate": 9.374510670541109e-05,
+ "loss": 0.4751,
+ "step": 44600
+ },
+ {
+ "epoch": 0.2303671944732511,
+ "grad_norm": 20858.388671875,
+ "learning_rate": 9.372740349041742e-05,
+ "loss": 0.4734,
+ "step": 44650
+ },
+ {
+ "epoch": 0.23062516445586392,
+ "grad_norm": 22074.056640625,
+ "learning_rate": 9.37096769345825e-05,
+ "loss": 0.4699,
+ "step": 44700
+ },
+ {
+ "epoch": 0.23088313443847674,
+ "grad_norm": 21852.623046875,
+ "learning_rate": 9.369192704736842e-05,
+ "loss": 0.47,
+ "step": 44750
+ },
+ {
+ "epoch": 0.23114110442108957,
+ "grad_norm": 20904.033203125,
+ "learning_rate": 9.367415383824974e-05,
+ "loss": 0.4736,
+ "step": 44800
+ },
+ {
+ "epoch": 0.2313990744037024,
+ "grad_norm": 18965.021484375,
+ "learning_rate": 9.365635731671343e-05,
+ "loss": 0.4687,
+ "step": 44850
+ },
+ {
+ "epoch": 0.2316570443863152,
+ "grad_norm": 16994.271484375,
+ "learning_rate": 9.363853749225894e-05,
+ "loss": 0.4747,
+ "step": 44900
+ },
+ {
+ "epoch": 0.23191501436892803,
+ "grad_norm": 19191.794921875,
+ "learning_rate": 9.362069437439814e-05,
+ "loss": 0.4689,
+ "step": 44950
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "grad_norm": 19691.982421875,
+ "learning_rate": 9.360282797265537e-05,
+ "loss": 0.4683,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "eval_loss": 0.4633353352546692,
+ "eval_runtime": 3256.5731,
+ "eval_samples_per_second": 952.265,
+ "eval_steps_per_second": 1.86,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23243095433415367,
+ "grad_norm": 21778.20703125,
+ "learning_rate": 9.358493829656732e-05,
+ "loss": 0.4726,
+ "step": 45050
+ },
+ {
+ "epoch": 0.2326889243167665,
+ "grad_norm": 20281.802734375,
+ "learning_rate": 9.35670253556832e-05,
+ "loss": 0.4752,
+ "step": 45100
+ },
+ {
+ "epoch": 0.23294689429937931,
+ "grad_norm": 20620.580078125,
+ "learning_rate": 9.354908915956456e-05,
+ "loss": 0.474,
+ "step": 45150
+ },
+ {
+ "epoch": 0.23320486428199214,
+ "grad_norm": 21115.86328125,
+ "learning_rate": 9.353112971778542e-05,
+ "loss": 0.4763,
+ "step": 45200
+ },
+ {
+ "epoch": 0.23346283426460496,
+ "grad_norm": 19746.30859375,
+ "learning_rate": 9.351314703993215e-05,
+ "loss": 0.4792,
+ "step": 45250
+ },
+ {
+ "epoch": 0.2337208042472178,
+ "grad_norm": 21270.26171875,
+ "learning_rate": 9.349514113560358e-05,
+ "loss": 0.4726,
+ "step": 45300
+ },
+ {
+ "epoch": 0.23397877422983063,
+ "grad_norm": 20273.658203125,
+ "learning_rate": 9.347711201441092e-05,
+ "loss": 0.4683,
+ "step": 45350
+ },
+ {
+ "epoch": 0.23423674421244345,
+ "grad_norm": 19746.9609375,
+ "learning_rate": 9.345905968597773e-05,
+ "loss": 0.4778,
+ "step": 45400
+ },
+ {
+ "epoch": 0.23449471419505627,
+ "grad_norm": 22999.52734375,
+ "learning_rate": 9.344098415994003e-05,
+ "loss": 0.4799,
+ "step": 45450
+ },
+ {
+ "epoch": 0.2347526841776691,
+ "grad_norm": 19922.41015625,
+ "learning_rate": 9.342288544594617e-05,
+ "loss": 0.4773,
+ "step": 45500
+ },
+ {
+ "epoch": 0.2350106541602819,
+ "grad_norm": 19793.73828125,
+ "learning_rate": 9.340476355365688e-05,
+ "loss": 0.4743,
+ "step": 45550
+ },
+ {
+ "epoch": 0.23526862414289473,
+ "grad_norm": 19525.74609375,
+ "learning_rate": 9.33866184927453e-05,
+ "loss": 0.4729,
+ "step": 45600
+ },
+ {
+ "epoch": 0.23552659412550755,
+ "grad_norm": 26093.65625,
+ "learning_rate": 9.336845027289691e-05,
+ "loss": 0.4767,
+ "step": 45650
+ },
+ {
+ "epoch": 0.23578456410812038,
+ "grad_norm": 20045.16796875,
+ "learning_rate": 9.335025890380953e-05,
+ "loss": 0.4768,
+ "step": 45700
+ },
+ {
+ "epoch": 0.2360425340907332,
+ "grad_norm": 21272.36328125,
+ "learning_rate": 9.333204439519338e-05,
+ "loss": 0.4738,
+ "step": 45750
+ },
+ {
+ "epoch": 0.23630050407334602,
+ "grad_norm": 19174.44921875,
+ "learning_rate": 9.3313806756771e-05,
+ "loss": 0.4752,
+ "step": 45800
+ },
+ {
+ "epoch": 0.23655847405595884,
+ "grad_norm": 18446.640625,
+ "learning_rate": 9.32955459982773e-05,
+ "loss": 0.4747,
+ "step": 45850
+ },
+ {
+ "epoch": 0.23681644403857166,
+ "grad_norm": 23397.7109375,
+ "learning_rate": 9.327726212945953e-05,
+ "loss": 0.4723,
+ "step": 45900
+ },
+ {
+ "epoch": 0.23707441402118448,
+ "grad_norm": 20350.755859375,
+ "learning_rate": 9.325895516007725e-05,
+ "loss": 0.4671,
+ "step": 45950
+ },
+ {
+ "epoch": 0.23733238400379733,
+ "grad_norm": 21147.5546875,
+ "learning_rate": 9.324062509990235e-05,
+ "loss": 0.4689,
+ "step": 46000
+ },
+ {
+ "epoch": 0.23759035398641015,
+ "grad_norm": 19813.130859375,
+ "learning_rate": 9.322227195871909e-05,
+ "loss": 0.4723,
+ "step": 46050
+ },
+ {
+ "epoch": 0.23784832396902297,
+ "grad_norm": 22310.037109375,
+ "learning_rate": 9.320389574632399e-05,
+ "loss": 0.4727,
+ "step": 46100
+ },
+ {
+ "epoch": 0.2381062939516358,
+ "grad_norm": 19646.509765625,
+ "learning_rate": 9.318549647252596e-05,
+ "loss": 0.4723,
+ "step": 46150
+ },
+ {
+ "epoch": 0.23836426393424862,
+ "grad_norm": 20145.29296875,
+ "learning_rate": 9.316707414714614e-05,
+ "loss": 0.4652,
+ "step": 46200
+ },
+ {
+ "epoch": 0.23862223391686144,
+ "grad_norm": 19513.466796875,
+ "learning_rate": 9.314862878001803e-05,
+ "loss": 0.4774,
+ "step": 46250
+ },
+ {
+ "epoch": 0.23888020389947426,
+ "grad_norm": 20701.25390625,
+ "learning_rate": 9.313016038098739e-05,
+ "loss": 0.4721,
+ "step": 46300
+ },
+ {
+ "epoch": 0.23913817388208708,
+ "grad_norm": 18766.328125,
+ "learning_rate": 9.31116689599123e-05,
+ "loss": 0.4691,
+ "step": 46350
+ },
+ {
+ "epoch": 0.2393961438646999,
+ "grad_norm": 20925.5,
+ "learning_rate": 9.309315452666314e-05,
+ "loss": 0.4743,
+ "step": 46400
+ },
+ {
+ "epoch": 0.23965411384731272,
+ "grad_norm": 19413.0703125,
+ "learning_rate": 9.307461709112253e-05,
+ "loss": 0.469,
+ "step": 46450
+ },
+ {
+ "epoch": 0.23991208382992554,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.305605666318543e-05,
+ "loss": 0.4769,
+ "step": 46500
+ },
+ {
+ "epoch": 0.24017005381253836,
+ "grad_norm": 20222.50390625,
+ "learning_rate": 9.3037473252759e-05,
+ "loss": 0.4701,
+ "step": 46550
+ },
+ {
+ "epoch": 0.24042802379515119,
+ "grad_norm": 21650.63671875,
+ "learning_rate": 9.301886686976272e-05,
+ "loss": 0.4693,
+ "step": 46600
+ },
+ {
+ "epoch": 0.240685993777764,
+ "grad_norm": 18923.498046875,
+ "learning_rate": 9.300023752412832e-05,
+ "loss": 0.4749,
+ "step": 46650
+ },
+ {
+ "epoch": 0.24094396376037686,
+ "grad_norm": 21353.748046875,
+ "learning_rate": 9.298158522579978e-05,
+ "loss": 0.4735,
+ "step": 46700
+ },
+ {
+ "epoch": 0.24120193374298968,
+ "grad_norm": 19405.5234375,
+ "learning_rate": 9.296290998473334e-05,
+ "loss": 0.4708,
+ "step": 46750
+ },
+ {
+ "epoch": 0.2414599037256025,
+ "grad_norm": 21692.3203125,
+ "learning_rate": 9.294421181089747e-05,
+ "loss": 0.4644,
+ "step": 46800
+ },
+ {
+ "epoch": 0.24171787370821532,
+ "grad_norm": 18488.671875,
+ "learning_rate": 9.292549071427291e-05,
+ "loss": 0.4668,
+ "step": 46850
+ },
+ {
+ "epoch": 0.24197584369082814,
+ "grad_norm": 21951.712890625,
+ "learning_rate": 9.29067467048526e-05,
+ "loss": 0.4749,
+ "step": 46900
+ },
+ {
+ "epoch": 0.24223381367344096,
+ "grad_norm": 20673.82421875,
+ "learning_rate": 9.288797979264176e-05,
+ "loss": 0.4687,
+ "step": 46950
+ },
+ {
+ "epoch": 0.24249178365605378,
+ "grad_norm": 18687.69140625,
+ "learning_rate": 9.286918998765776e-05,
+ "loss": 0.4731,
+ "step": 47000
+ },
+ {
+ "epoch": 0.2427497536386666,
+ "grad_norm": 18882.009765625,
+ "learning_rate": 9.285037729993027e-05,
+ "loss": 0.4699,
+ "step": 47050
+ },
+ {
+ "epoch": 0.24300772362127943,
+ "grad_norm": 22378.685546875,
+ "learning_rate": 9.283154173950112e-05,
+ "loss": 0.4678,
+ "step": 47100
+ },
+ {
+ "epoch": 0.24326569360389225,
+ "grad_norm": 19457.736328125,
+ "learning_rate": 9.281268331642439e-05,
+ "loss": 0.4665,
+ "step": 47150
+ },
+ {
+ "epoch": 0.24352366358650507,
+ "grad_norm": 19794.4296875,
+ "learning_rate": 9.279380204076631e-05,
+ "loss": 0.4683,
+ "step": 47200
+ },
+ {
+ "epoch": 0.2437816335691179,
+ "grad_norm": 18910.41796875,
+ "learning_rate": 9.277489792260536e-05,
+ "loss": 0.4683,
+ "step": 47250
+ },
+ {
+ "epoch": 0.2440396035517307,
+ "grad_norm": 21774.009765625,
+ "learning_rate": 9.275597097203216e-05,
+ "loss": 0.4729,
+ "step": 47300
+ },
+ {
+ "epoch": 0.24429757353434353,
+ "grad_norm": 21403.1796875,
+ "learning_rate": 9.273702119914962e-05,
+ "loss": 0.4681,
+ "step": 47350
+ },
+ {
+ "epoch": 0.24455554351695638,
+ "grad_norm": 20333.400390625,
+ "learning_rate": 9.271804861407269e-05,
+ "loss": 0.4713,
+ "step": 47400
+ },
+ {
+ "epoch": 0.2448135134995692,
+ "grad_norm": 22196.32421875,
+ "learning_rate": 9.269905322692862e-05,
+ "loss": 0.468,
+ "step": 47450
+ },
+ {
+ "epoch": 0.24507148348218202,
+ "grad_norm": 18356.623046875,
+ "learning_rate": 9.268003504785673e-05,
+ "loss": 0.4663,
+ "step": 47500
+ },
+ {
+ "epoch": 0.24532945346479484,
+ "grad_norm": 20337.546875,
+ "learning_rate": 9.266099408700859e-05,
+ "loss": 0.4657,
+ "step": 47550
+ },
+ {
+ "epoch": 0.24558742344740767,
+ "grad_norm": 20426.03515625,
+ "learning_rate": 9.264193035454789e-05,
+ "loss": 0.4677,
+ "step": 47600
+ },
+ {
+ "epoch": 0.2458453934300205,
+ "grad_norm": 20962.81640625,
+ "learning_rate": 9.262284386065047e-05,
+ "loss": 0.4759,
+ "step": 47650
+ },
+ {
+ "epoch": 0.2461033634126333,
+ "grad_norm": 20498.919921875,
+ "learning_rate": 9.260373461550435e-05,
+ "loss": 0.4647,
+ "step": 47700
+ },
+ {
+ "epoch": 0.24636133339524613,
+ "grad_norm": 21223.171875,
+ "learning_rate": 9.258460262930967e-05,
+ "loss": 0.4698,
+ "step": 47750
+ },
+ {
+ "epoch": 0.24661930337785895,
+ "grad_norm": 21146.671875,
+ "learning_rate": 9.256544791227871e-05,
+ "loss": 0.4727,
+ "step": 47800
+ },
+ {
+ "epoch": 0.24687727336047177,
+ "grad_norm": 19261.603515625,
+ "learning_rate": 9.254627047463588e-05,
+ "loss": 0.4734,
+ "step": 47850
+ },
+ {
+ "epoch": 0.2471352433430846,
+ "grad_norm": 21131.298828125,
+ "learning_rate": 9.252707032661774e-05,
+ "loss": 0.4686,
+ "step": 47900
+ },
+ {
+ "epoch": 0.24739321332569741,
+ "grad_norm": 22491.212890625,
+ "learning_rate": 9.250784747847294e-05,
+ "loss": 0.4701,
+ "step": 47950
+ },
+ {
+ "epoch": 0.24765118330831024,
+ "grad_norm": 20198.486328125,
+ "learning_rate": 9.248860194046228e-05,
+ "loss": 0.4657,
+ "step": 48000
+ },
+ {
+ "epoch": 0.24790915329092306,
+ "grad_norm": 21754.078125,
+ "learning_rate": 9.246933372285863e-05,
+ "loss": 0.4674,
+ "step": 48050
+ },
+ {
+ "epoch": 0.24816712327353588,
+ "grad_norm": 20948.244140625,
+ "learning_rate": 9.245004283594703e-05,
+ "loss": 0.4604,
+ "step": 48100
+ },
+ {
+ "epoch": 0.24842509325614873,
+ "grad_norm": 20916.3671875,
+ "learning_rate": 9.243072929002454e-05,
+ "loss": 0.4656,
+ "step": 48150
+ },
+ {
+ "epoch": 0.24868306323876155,
+ "grad_norm": 19935.021484375,
+ "learning_rate": 9.24113930954004e-05,
+ "loss": 0.4735,
+ "step": 48200
+ },
+ {
+ "epoch": 0.24894103322137437,
+ "grad_norm": 20075.96875,
+ "learning_rate": 9.239203426239585e-05,
+ "loss": 0.4679,
+ "step": 48250
+ },
+ {
+ "epoch": 0.2491990032039872,
+ "grad_norm": 20107.943359375,
+ "learning_rate": 9.23726528013443e-05,
+ "loss": 0.4773,
+ "step": 48300
+ },
+ {
+ "epoch": 0.2494569731866,
+ "grad_norm": 20341.1171875,
+ "learning_rate": 9.235324872259119e-05,
+ "loss": 0.4699,
+ "step": 48350
+ },
+ {
+ "epoch": 0.24971494316921283,
+ "grad_norm": 21787.4296875,
+ "learning_rate": 9.233382203649401e-05,
+ "loss": 0.4665,
+ "step": 48400
+ },
+ {
+ "epoch": 0.24997291315182565,
+ "grad_norm": 17707.583984375,
+ "learning_rate": 9.231437275342239e-05,
+ "loss": 0.4678,
+ "step": 48450
+ },
+ {
+ "epoch": 0.2502308831344385,
+ "grad_norm": 24467.810546875,
+ "learning_rate": 9.229490088375797e-05,
+ "loss": 0.466,
+ "step": 48500
+ },
+ {
+ "epoch": 0.2504888531170513,
+ "grad_norm": 20794.73828125,
+ "learning_rate": 9.227540643789446e-05,
+ "loss": 0.4711,
+ "step": 48550
+ },
+ {
+ "epoch": 0.2507468230996641,
+ "grad_norm": 20147.099609375,
+ "learning_rate": 9.225588942623758e-05,
+ "loss": 0.4689,
+ "step": 48600
+ },
+ {
+ "epoch": 0.25100479308227697,
+ "grad_norm": 20704.037109375,
+ "learning_rate": 9.223634985920517e-05,
+ "loss": 0.4687,
+ "step": 48650
+ },
+ {
+ "epoch": 0.25126276306488976,
+ "grad_norm": 19472.21875,
+ "learning_rate": 9.221678774722707e-05,
+ "loss": 0.4636,
+ "step": 48700
+ },
+ {
+ "epoch": 0.2515207330475026,
+ "grad_norm": 21352.755859375,
+ "learning_rate": 9.219720310074515e-05,
+ "loss": 0.4671,
+ "step": 48750
+ },
+ {
+ "epoch": 0.2517787030301154,
+ "grad_norm": 20956.146484375,
+ "learning_rate": 9.21775959302133e-05,
+ "loss": 0.4703,
+ "step": 48800
+ },
+ {
+ "epoch": 0.25203667301272825,
+ "grad_norm": 26295.541015625,
+ "learning_rate": 9.215796624609749e-05,
+ "loss": 0.4742,
+ "step": 48850
+ },
+ {
+ "epoch": 0.25229464299534105,
+ "grad_norm": 19862.15625,
+ "learning_rate": 9.213831405887564e-05,
+ "loss": 0.468,
+ "step": 48900
+ },
+ {
+ "epoch": 0.2525526129779539,
+ "grad_norm": 21760.404296875,
+ "learning_rate": 9.211863937903769e-05,
+ "loss": 0.4728,
+ "step": 48950
+ },
+ {
+ "epoch": 0.2528105829605667,
+ "grad_norm": 22488.1484375,
+ "learning_rate": 9.209894221708564e-05,
+ "loss": 0.4627,
+ "step": 49000
+ },
+ {
+ "epoch": 0.25306855294317954,
+ "grad_norm": 20244.5,
+ "learning_rate": 9.20792225835334e-05,
+ "loss": 0.4706,
+ "step": 49050
+ },
+ {
+ "epoch": 0.25332652292579233,
+ "grad_norm": 22642.44140625,
+ "learning_rate": 9.205948048890698e-05,
+ "loss": 0.4708,
+ "step": 49100
+ },
+ {
+ "epoch": 0.2535844929084052,
+ "grad_norm": 23121.501953125,
+ "learning_rate": 9.203971594374432e-05,
+ "loss": 0.4723,
+ "step": 49150
+ },
+ {
+ "epoch": 0.25384246289101803,
+ "grad_norm": 19514.916015625,
+ "learning_rate": 9.201992895859532e-05,
+ "loss": 0.4692,
+ "step": 49200
+ },
+ {
+ "epoch": 0.2541004328736308,
+ "grad_norm": 19467.662109375,
+ "learning_rate": 9.200011954402193e-05,
+ "loss": 0.4719,
+ "step": 49250
+ },
+ {
+ "epoch": 0.25435840285624367,
+ "grad_norm": 20737.7578125,
+ "learning_rate": 9.198028771059799e-05,
+ "loss": 0.4643,
+ "step": 49300
+ },
+ {
+ "epoch": 0.25461637283885646,
+ "grad_norm": 20229.341796875,
+ "learning_rate": 9.196043346890939e-05,
+ "loss": 0.462,
+ "step": 49350
+ },
+ {
+ "epoch": 0.2548743428214693,
+ "grad_norm": 23094.35546875,
+ "learning_rate": 9.194055682955392e-05,
+ "loss": 0.4701,
+ "step": 49400
+ },
+ {
+ "epoch": 0.2551323128040821,
+ "grad_norm": 21099.541015625,
+ "learning_rate": 9.192065780314132e-05,
+ "loss": 0.466,
+ "step": 49450
+ },
+ {
+ "epoch": 0.25539028278669496,
+ "grad_norm": 21500.302734375,
+ "learning_rate": 9.190073640029335e-05,
+ "loss": 0.4703,
+ "step": 49500
+ },
+ {
+ "epoch": 0.25564825276930775,
+ "grad_norm": 24272.228515625,
+ "learning_rate": 9.188079263164366e-05,
+ "loss": 0.4672,
+ "step": 49550
+ },
+ {
+ "epoch": 0.2559062227519206,
+ "grad_norm": 21129.013671875,
+ "learning_rate": 9.186082650783783e-05,
+ "loss": 0.4715,
+ "step": 49600
+ },
+ {
+ "epoch": 0.2561641927345334,
+ "grad_norm": 20696.32421875,
+ "learning_rate": 9.184083803953339e-05,
+ "loss": 0.4646,
+ "step": 49650
+ },
+ {
+ "epoch": 0.25642216271714624,
+ "grad_norm": 20142.7890625,
+ "learning_rate": 9.18208272373998e-05,
+ "loss": 0.4627,
+ "step": 49700
+ },
+ {
+ "epoch": 0.25668013269975903,
+ "grad_norm": 18810.43359375,
+ "learning_rate": 9.180079411211847e-05,
+ "loss": 0.4659,
+ "step": 49750
+ },
+ {
+ "epoch": 0.2569381026823719,
+ "grad_norm": 23121.84765625,
+ "learning_rate": 9.178073867438264e-05,
+ "loss": 0.4683,
+ "step": 49800
+ },
+ {
+ "epoch": 0.2571960726649847,
+ "grad_norm": 20432.021484375,
+ "learning_rate": 9.176066093489755e-05,
+ "loss": 0.4704,
+ "step": 49850
+ },
+ {
+ "epoch": 0.2574540426475975,
+ "grad_norm": 22056.09765625,
+ "learning_rate": 9.17405609043803e-05,
+ "loss": 0.4753,
+ "step": 49900
+ },
+ {
+ "epoch": 0.2577120126302104,
+ "grad_norm": 21094.931640625,
+ "learning_rate": 9.17204385935599e-05,
+ "loss": 0.4648,
+ "step": 49950
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "grad_norm": 20127.525390625,
+ "learning_rate": 9.170029401317725e-05,
+ "loss": 0.4646,
+ "step": 50000
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "eval_loss": 0.4567689299583435,
+ "eval_runtime": 3268.0543,
+ "eval_samples_per_second": 948.919,
+ "eval_steps_per_second": 1.853,
+ "step": 50000
+ },
+ {
+ "epoch": 0.258227952595436,
+ "grad_norm": 20947.306640625,
+ "learning_rate": 9.168012717398516e-05,
+ "loss": 0.4688,
+ "step": 50050
+ },
+ {
+ "epoch": 0.2584859225780488,
+ "grad_norm": 23591.646484375,
+ "learning_rate": 9.165993808674823e-05,
+ "loss": 0.4683,
+ "step": 50100
+ },
+ {
+ "epoch": 0.25874389256066166,
+ "grad_norm": 21227.677734375,
+ "learning_rate": 9.163972676224306e-05,
+ "loss": 0.4671,
+ "step": 50150
+ },
+ {
+ "epoch": 0.25900186254327445,
+ "grad_norm": 20084.953125,
+ "learning_rate": 9.161949321125807e-05,
+ "loss": 0.4598,
+ "step": 50200
+ },
+ {
+ "epoch": 0.2592598325258873,
+ "grad_norm": 21139.5,
+ "learning_rate": 9.159923744459349e-05,
+ "loss": 0.4707,
+ "step": 50250
+ },
+ {
+ "epoch": 0.2595178025085001,
+ "grad_norm": 20410.794921875,
+ "learning_rate": 9.15789594730615e-05,
+ "loss": 0.4675,
+ "step": 50300
+ },
+ {
+ "epoch": 0.25977577249111294,
+ "grad_norm": 20010.328125,
+ "learning_rate": 9.155865930748608e-05,
+ "loss": 0.4599,
+ "step": 50350
+ },
+ {
+ "epoch": 0.26003374247372574,
+ "grad_norm": 23502.890625,
+ "learning_rate": 9.153833695870304e-05,
+ "loss": 0.4664,
+ "step": 50400
+ },
+ {
+ "epoch": 0.2602917124563386,
+ "grad_norm": 20373.498046875,
+ "learning_rate": 9.151799243756008e-05,
+ "loss": 0.4655,
+ "step": 50450
+ },
+ {
+ "epoch": 0.2605496824389514,
+ "grad_norm": 21093.669921875,
+ "learning_rate": 9.149762575491671e-05,
+ "loss": 0.4623,
+ "step": 50500
+ },
+ {
+ "epoch": 0.26080765242156423,
+ "grad_norm": 22206.87890625,
+ "learning_rate": 9.147723692164427e-05,
+ "loss": 0.4687,
+ "step": 50550
+ },
+ {
+ "epoch": 0.261065622404177,
+ "grad_norm": 23264.875,
+ "learning_rate": 9.145682594862593e-05,
+ "loss": 0.4705,
+ "step": 50600
+ },
+ {
+ "epoch": 0.26132359238678987,
+ "grad_norm": 22029.849609375,
+ "learning_rate": 9.143639284675664e-05,
+ "loss": 0.4673,
+ "step": 50650
+ },
+ {
+ "epoch": 0.2615815623694027,
+ "grad_norm": 23016.955078125,
+ "learning_rate": 9.141593762694323e-05,
+ "loss": 0.4663,
+ "step": 50700
+ },
+ {
+ "epoch": 0.2618395323520155,
+ "grad_norm": 21590.80859375,
+ "learning_rate": 9.139546030010427e-05,
+ "loss": 0.4684,
+ "step": 50750
+ },
+ {
+ "epoch": 0.26209750233462836,
+ "grad_norm": 19839.986328125,
+ "learning_rate": 9.13749608771702e-05,
+ "loss": 0.4682,
+ "step": 50800
+ },
+ {
+ "epoch": 0.26235547231724116,
+ "grad_norm": 17922.802734375,
+ "learning_rate": 9.135443936908318e-05,
+ "loss": 0.4601,
+ "step": 50850
+ },
+ {
+ "epoch": 0.262613442299854,
+ "grad_norm": 21141.119140625,
+ "learning_rate": 9.133389578679723e-05,
+ "loss": 0.467,
+ "step": 50900
+ },
+ {
+ "epoch": 0.2628714122824668,
+ "grad_norm": 21858.158203125,
+ "learning_rate": 9.131333014127806e-05,
+ "loss": 0.4663,
+ "step": 50950
+ },
+ {
+ "epoch": 0.26312938226507965,
+ "grad_norm": 21516.46875,
+ "learning_rate": 9.129274244350326e-05,
+ "loss": 0.4656,
+ "step": 51000
+ },
+ {
+ "epoch": 0.26338735224769244,
+ "grad_norm": 21403.263671875,
+ "learning_rate": 9.127213270446213e-05,
+ "loss": 0.4717,
+ "step": 51050
+ },
+ {
+ "epoch": 0.2636453222303053,
+ "grad_norm": 20405.4296875,
+ "learning_rate": 9.125150093515575e-05,
+ "loss": 0.4656,
+ "step": 51100
+ },
+ {
+ "epoch": 0.2639032922129181,
+ "grad_norm": 21057.57421875,
+ "learning_rate": 9.123084714659698e-05,
+ "loss": 0.4655,
+ "step": 51150
+ },
+ {
+ "epoch": 0.26416126219553093,
+ "grad_norm": 19891.15234375,
+ "learning_rate": 9.121017134981036e-05,
+ "loss": 0.4706,
+ "step": 51200
+ },
+ {
+ "epoch": 0.2644192321781437,
+ "grad_norm": 20441.30078125,
+ "learning_rate": 9.118947355583228e-05,
+ "loss": 0.4707,
+ "step": 51250
+ },
+ {
+ "epoch": 0.2646772021607566,
+ "grad_norm": 22182.67578125,
+ "learning_rate": 9.11687537757108e-05,
+ "loss": 0.4633,
+ "step": 51300
+ },
+ {
+ "epoch": 0.2649351721433694,
+ "grad_norm": 18211.728515625,
+ "learning_rate": 9.114801202050574e-05,
+ "loss": 0.4677,
+ "step": 51350
+ },
+ {
+ "epoch": 0.2651931421259822,
+ "grad_norm": 20691.697265625,
+ "learning_rate": 9.112724830128865e-05,
+ "loss": 0.4634,
+ "step": 51400
+ },
+ {
+ "epoch": 0.26545111210859507,
+ "grad_norm": 19717.75390625,
+ "learning_rate": 9.110646262914279e-05,
+ "loss": 0.4647,
+ "step": 51450
+ },
+ {
+ "epoch": 0.26570908209120786,
+ "grad_norm": 19860.55078125,
+ "learning_rate": 9.108565501516318e-05,
+ "loss": 0.4665,
+ "step": 51500
+ },
+ {
+ "epoch": 0.2659670520738207,
+ "grad_norm": 20122.984375,
+ "learning_rate": 9.106482547045648e-05,
+ "loss": 0.4663,
+ "step": 51550
+ },
+ {
+ "epoch": 0.2662250220564335,
+ "grad_norm": 21214.724609375,
+ "learning_rate": 9.104397400614112e-05,
+ "loss": 0.4676,
+ "step": 51600
+ },
+ {
+ "epoch": 0.26648299203904635,
+ "grad_norm": 24545.041015625,
+ "learning_rate": 9.102310063334722e-05,
+ "loss": 0.4705,
+ "step": 51650
+ },
+ {
+ "epoch": 0.26674096202165914,
+ "grad_norm": 22479.380859375,
+ "learning_rate": 9.100220536321655e-05,
+ "loss": 0.4616,
+ "step": 51700
+ },
+ {
+ "epoch": 0.266998932004272,
+ "grad_norm": 20262.27734375,
+ "learning_rate": 9.098128820690264e-05,
+ "loss": 0.4569,
+ "step": 51750
+ },
+ {
+ "epoch": 0.2672569019868848,
+ "grad_norm": 20906.880859375,
+ "learning_rate": 9.096034917557062e-05,
+ "loss": 0.468,
+ "step": 51800
+ },
+ {
+ "epoch": 0.26751487196949764,
+ "grad_norm": 20986.455078125,
+ "learning_rate": 9.093938828039737e-05,
+ "loss": 0.4697,
+ "step": 51850
+ },
+ {
+ "epoch": 0.26777284195211043,
+ "grad_norm": 22425.681640625,
+ "learning_rate": 9.09184055325714e-05,
+ "loss": 0.4692,
+ "step": 51900
+ },
+ {
+ "epoch": 0.2680308119347233,
+ "grad_norm": 21817.744140625,
+ "learning_rate": 9.089740094329288e-05,
+ "loss": 0.4726,
+ "step": 51950
+ },
+ {
+ "epoch": 0.26828878191733607,
+ "grad_norm": 20527.017578125,
+ "learning_rate": 9.087637452377369e-05,
+ "loss": 0.459,
+ "step": 52000
+ },
+ {
+ "epoch": 0.2685467518999489,
+ "grad_norm": 24486.521484375,
+ "learning_rate": 9.08553262852373e-05,
+ "loss": 0.4624,
+ "step": 52050
+ },
+ {
+ "epoch": 0.26880472188256177,
+ "grad_norm": 20964.537109375,
+ "learning_rate": 9.083425623891885e-05,
+ "loss": 0.4657,
+ "step": 52100
+ },
+ {
+ "epoch": 0.26906269186517456,
+ "grad_norm": 20966.478515625,
+ "learning_rate": 9.081316439606513e-05,
+ "loss": 0.4723,
+ "step": 52150
+ },
+ {
+ "epoch": 0.2693206618477874,
+ "grad_norm": 20067.330078125,
+ "learning_rate": 9.079205076793457e-05,
+ "loss": 0.4644,
+ "step": 52200
+ },
+ {
+ "epoch": 0.2695786318304002,
+ "grad_norm": 21526.298828125,
+ "learning_rate": 9.077091536579719e-05,
+ "loss": 0.4602,
+ "step": 52250
+ },
+ {
+ "epoch": 0.26983660181301306,
+ "grad_norm": 20446.767578125,
+ "learning_rate": 9.074975820093468e-05,
+ "loss": 0.4671,
+ "step": 52300
+ },
+ {
+ "epoch": 0.27009457179562585,
+ "grad_norm": 19936.599609375,
+ "learning_rate": 9.072857928464029e-05,
+ "loss": 0.4626,
+ "step": 52350
+ },
+ {
+ "epoch": 0.2703525417782387,
+ "grad_norm": 21716.60546875,
+ "learning_rate": 9.070737862821896e-05,
+ "loss": 0.4642,
+ "step": 52400
+ },
+ {
+ "epoch": 0.2706105117608515,
+ "grad_norm": 17588.40625,
+ "learning_rate": 9.068615624298717e-05,
+ "loss": 0.4595,
+ "step": 52450
+ },
+ {
+ "epoch": 0.27086848174346434,
+ "grad_norm": 21721.138671875,
+ "learning_rate": 9.066491214027302e-05,
+ "loss": 0.4639,
+ "step": 52500
+ },
+ {
+ "epoch": 0.27112645172607713,
+ "grad_norm": 19480.875,
+ "learning_rate": 9.06436463314162e-05,
+ "loss": 0.4654,
+ "step": 52550
+ },
+ {
+ "epoch": 0.27138442170869,
+ "grad_norm": 22658.076171875,
+ "learning_rate": 9.062235882776797e-05,
+ "loss": 0.4653,
+ "step": 52600
+ },
+ {
+ "epoch": 0.2716423916913028,
+ "grad_norm": 22396.4140625,
+ "learning_rate": 9.060104964069121e-05,
+ "loss": 0.4634,
+ "step": 52650
+ },
+ {
+ "epoch": 0.2719003616739156,
+ "grad_norm": 22354.28125,
+ "learning_rate": 9.057971878156036e-05,
+ "loss": 0.4626,
+ "step": 52700
+ },
+ {
+ "epoch": 0.2721583316565285,
+ "grad_norm": 19845.22265625,
+ "learning_rate": 9.05583662617614e-05,
+ "loss": 0.4666,
+ "step": 52750
+ },
+ {
+ "epoch": 0.27241630163914127,
+ "grad_norm": 19933.978515625,
+ "learning_rate": 9.053699209269188e-05,
+ "loss": 0.4601,
+ "step": 52800
+ },
+ {
+ "epoch": 0.2726742716217541,
+ "grad_norm": 21288.86328125,
+ "learning_rate": 9.051559628576094e-05,
+ "loss": 0.4622,
+ "step": 52850
+ },
+ {
+ "epoch": 0.2729322416043669,
+ "grad_norm": 20604.05078125,
+ "learning_rate": 9.049417885238927e-05,
+ "loss": 0.4618,
+ "step": 52900
+ },
+ {
+ "epoch": 0.27319021158697976,
+ "grad_norm": 18641.544921875,
+ "learning_rate": 9.047273980400903e-05,
+ "loss": 0.46,
+ "step": 52950
+ },
+ {
+ "epoch": 0.27344818156959255,
+ "grad_norm": 22482.8125,
+ "learning_rate": 9.045127915206398e-05,
+ "loss": 0.4673,
+ "step": 53000
+ },
+ {
+ "epoch": 0.2737061515522054,
+ "grad_norm": 20967.9375,
+ "learning_rate": 9.042979690800943e-05,
+ "loss": 0.4607,
+ "step": 53050
+ },
+ {
+ "epoch": 0.2739641215348182,
+ "grad_norm": 22371.90234375,
+ "learning_rate": 9.040829308331216e-05,
+ "loss": 0.4624,
+ "step": 53100
+ },
+ {
+ "epoch": 0.27422209151743104,
+ "grad_norm": 19802.947265625,
+ "learning_rate": 9.03867676894505e-05,
+ "loss": 0.4542,
+ "step": 53150
+ },
+ {
+ "epoch": 0.27448006150004384,
+ "grad_norm": 21255.974609375,
+ "learning_rate": 9.03652207379143e-05,
+ "loss": 0.4636,
+ "step": 53200
+ },
+ {
+ "epoch": 0.2747380314826567,
+ "grad_norm": 21687.16796875,
+ "learning_rate": 9.034365224020489e-05,
+ "loss": 0.4626,
+ "step": 53250
+ },
+ {
+ "epoch": 0.2749960014652695,
+ "grad_norm": 21386.275390625,
+ "learning_rate": 9.032206220783512e-05,
+ "loss": 0.4659,
+ "step": 53300
+ },
+ {
+ "epoch": 0.27525397144788233,
+ "grad_norm": 19433.888671875,
+ "learning_rate": 9.030045065232935e-05,
+ "loss": 0.4585,
+ "step": 53350
+ },
+ {
+ "epoch": 0.2755119414304951,
+ "grad_norm": 20615.021484375,
+ "learning_rate": 9.027881758522339e-05,
+ "loss": 0.4619,
+ "step": 53400
+ },
+ {
+ "epoch": 0.27576991141310797,
+ "grad_norm": 20498.369140625,
+ "learning_rate": 9.025716301806454e-05,
+ "loss": 0.4658,
+ "step": 53450
+ },
+ {
+ "epoch": 0.2760278813957208,
+ "grad_norm": 20348.955078125,
+ "learning_rate": 9.023548696241162e-05,
+ "loss": 0.4637,
+ "step": 53500
+ },
+ {
+ "epoch": 0.2762858513783336,
+ "grad_norm": 18524.3203125,
+ "learning_rate": 9.021378942983487e-05,
+ "loss": 0.4636,
+ "step": 53550
+ },
+ {
+ "epoch": 0.27654382136094646,
+ "grad_norm": 20778.064453125,
+ "learning_rate": 9.019207043191602e-05,
+ "loss": 0.4604,
+ "step": 53600
+ },
+ {
+ "epoch": 0.27680179134355926,
+ "grad_norm": 19481.369140625,
+ "learning_rate": 9.017032998024823e-05,
+ "loss": 0.4629,
+ "step": 53650
+ },
+ {
+ "epoch": 0.2770597613261721,
+ "grad_norm": 20873.8515625,
+ "learning_rate": 9.014856808643617e-05,
+ "loss": 0.4647,
+ "step": 53700
+ },
+ {
+ "epoch": 0.2773177313087849,
+ "grad_norm": 21859.05078125,
+ "learning_rate": 9.012678476209591e-05,
+ "loss": 0.4621,
+ "step": 53750
+ },
+ {
+ "epoch": 0.27757570129139775,
+ "grad_norm": 20832.587890625,
+ "learning_rate": 9.010498001885492e-05,
+ "loss": 0.463,
+ "step": 53800
+ },
+ {
+ "epoch": 0.27783367127401054,
+ "grad_norm": 18435.703125,
+ "learning_rate": 9.00831538683522e-05,
+ "loss": 0.466,
+ "step": 53850
+ },
+ {
+ "epoch": 0.2780916412566234,
+ "grad_norm": 21496.61328125,
+ "learning_rate": 9.006130632223811e-05,
+ "loss": 0.4611,
+ "step": 53900
+ },
+ {
+ "epoch": 0.2783496112392362,
+ "grad_norm": 21796.873046875,
+ "learning_rate": 9.003943739217444e-05,
+ "loss": 0.4587,
+ "step": 53950
+ },
+ {
+ "epoch": 0.27860758122184903,
+ "grad_norm": 21053.099609375,
+ "learning_rate": 9.001754708983443e-05,
+ "loss": 0.4659,
+ "step": 54000
+ },
+ {
+ "epoch": 0.2788655512044618,
+ "grad_norm": 20332.98828125,
+ "learning_rate": 8.999563542690266e-05,
+ "loss": 0.4586,
+ "step": 54050
+ },
+ {
+ "epoch": 0.2791235211870747,
+ "grad_norm": 19829.93359375,
+ "learning_rate": 8.997370241507516e-05,
+ "loss": 0.4608,
+ "step": 54100
+ },
+ {
+ "epoch": 0.27938149116968747,
+ "grad_norm": 21215.3515625,
+ "learning_rate": 8.995174806605937e-05,
+ "loss": 0.4672,
+ "step": 54150
+ },
+ {
+ "epoch": 0.2796394611523003,
+ "grad_norm": 19068.890625,
+ "learning_rate": 8.992977239157408e-05,
+ "loss": 0.4637,
+ "step": 54200
+ },
+ {
+ "epoch": 0.27989743113491317,
+ "grad_norm": 20632.857421875,
+ "learning_rate": 8.99077754033495e-05,
+ "loss": 0.4615,
+ "step": 54250
+ },
+ {
+ "epoch": 0.28015540111752596,
+ "grad_norm": 20244.943359375,
+ "learning_rate": 8.988575711312714e-05,
+ "loss": 0.4665,
+ "step": 54300
+ },
+ {
+ "epoch": 0.2804133711001388,
+ "grad_norm": 21873.34375,
+ "learning_rate": 8.986371753266001e-05,
+ "loss": 0.4636,
+ "step": 54350
+ },
+ {
+ "epoch": 0.2806713410827516,
+ "grad_norm": 18075.001953125,
+ "learning_rate": 8.984165667371236e-05,
+ "loss": 0.4626,
+ "step": 54400
+ },
+ {
+ "epoch": 0.28092931106536445,
+ "grad_norm": 19815.0546875,
+ "learning_rate": 8.981957454805987e-05,
+ "loss": 0.4535,
+ "step": 54450
+ },
+ {
+ "epoch": 0.28118728104797724,
+ "grad_norm": 22713.48046875,
+ "learning_rate": 8.979747116748955e-05,
+ "loss": 0.4592,
+ "step": 54500
+ },
+ {
+ "epoch": 0.2814452510305901,
+ "grad_norm": 23360.1953125,
+ "learning_rate": 8.977534654379976e-05,
+ "loss": 0.4646,
+ "step": 54550
+ },
+ {
+ "epoch": 0.2817032210132029,
+ "grad_norm": 21626.36328125,
+ "learning_rate": 8.975320068880018e-05,
+ "loss": 0.4644,
+ "step": 54600
+ },
+ {
+ "epoch": 0.28196119099581574,
+ "grad_norm": 20061.873046875,
+ "learning_rate": 8.973103361431184e-05,
+ "loss": 0.4674,
+ "step": 54650
+ },
+ {
+ "epoch": 0.28221916097842853,
+ "grad_norm": 21295.0625,
+ "learning_rate": 8.970884533216713e-05,
+ "loss": 0.4674,
+ "step": 54700
+ },
+ {
+ "epoch": 0.2824771309610414,
+ "grad_norm": 19434.23828125,
+ "learning_rate": 8.968663585420967e-05,
+ "loss": 0.46,
+ "step": 54750
+ },
+ {
+ "epoch": 0.28273510094365417,
+ "grad_norm": 23654.849609375,
+ "learning_rate": 8.966440519229449e-05,
+ "loss": 0.4649,
+ "step": 54800
+ },
+ {
+ "epoch": 0.282993070926267,
+ "grad_norm": 22763.603515625,
+ "learning_rate": 8.964215335828787e-05,
+ "loss": 0.4578,
+ "step": 54850
+ },
+ {
+ "epoch": 0.28325104090887987,
+ "grad_norm": 23262.849609375,
+ "learning_rate": 8.961988036406741e-05,
+ "loss": 0.4674,
+ "step": 54900
+ },
+ {
+ "epoch": 0.28350901089149266,
+ "grad_norm": 20148.380859375,
+ "learning_rate": 8.959758622152201e-05,
+ "loss": 0.4642,
+ "step": 54950
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "grad_norm": 22515.548828125,
+ "learning_rate": 8.957527094255186e-05,
+ "loss": 0.4697,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "eval_loss": 0.4508056044578552,
+ "eval_runtime": 3347.9938,
+ "eval_samples_per_second": 926.262,
+ "eval_steps_per_second": 1.809,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2840249508567183,
+ "grad_norm": 21158.09375,
+ "learning_rate": 8.95529345390684e-05,
+ "loss": 0.4617,
+ "step": 55050
+ },
+ {
+ "epoch": 0.28428292083933115,
+ "grad_norm": 20892.517578125,
+ "learning_rate": 8.953057702299437e-05,
+ "loss": 0.4612,
+ "step": 55100
+ },
+ {
+ "epoch": 0.28454089082194395,
+ "grad_norm": 21489.740234375,
+ "learning_rate": 8.950819840626381e-05,
+ "loss": 0.4578,
+ "step": 55150
+ },
+ {
+ "epoch": 0.2847988608045568,
+ "grad_norm": 20703.072265625,
+ "learning_rate": 8.948579870082197e-05,
+ "loss": 0.4632,
+ "step": 55200
+ },
+ {
+ "epoch": 0.2850568307871696,
+ "grad_norm": 21731.775390625,
+ "learning_rate": 8.946337791862537e-05,
+ "loss": 0.4621,
+ "step": 55250
+ },
+ {
+ "epoch": 0.28531480076978244,
+ "grad_norm": 24507.076171875,
+ "learning_rate": 8.94409360716418e-05,
+ "loss": 0.4542,
+ "step": 55300
+ },
+ {
+ "epoch": 0.28557277075239523,
+ "grad_norm": 20686.79296875,
+ "learning_rate": 8.94184731718503e-05,
+ "loss": 0.4575,
+ "step": 55350
+ },
+ {
+ "epoch": 0.2858307407350081,
+ "grad_norm": 20055.396484375,
+ "learning_rate": 8.93959892312411e-05,
+ "loss": 0.4595,
+ "step": 55400
+ },
+ {
+ "epoch": 0.2860887107176209,
+ "grad_norm": 21203.28515625,
+ "learning_rate": 8.93734842618157e-05,
+ "loss": 0.457,
+ "step": 55450
+ },
+ {
+ "epoch": 0.2863466807002337,
+ "grad_norm": 21738.6328125,
+ "learning_rate": 8.935095827558684e-05,
+ "loss": 0.4639,
+ "step": 55500
+ },
+ {
+ "epoch": 0.2866046506828465,
+ "grad_norm": 21593.056640625,
+ "learning_rate": 8.932841128457844e-05,
+ "loss": 0.4566,
+ "step": 55550
+ },
+ {
+ "epoch": 0.28686262066545937,
+ "grad_norm": 20362.564453125,
+ "learning_rate": 8.930584330082564e-05,
+ "loss": 0.4613,
+ "step": 55600
+ },
+ {
+ "epoch": 0.2871205906480722,
+ "grad_norm": 20415.390625,
+ "learning_rate": 8.928325433637482e-05,
+ "loss": 0.4591,
+ "step": 55650
+ },
+ {
+ "epoch": 0.287378560630685,
+ "grad_norm": 21615.1953125,
+ "learning_rate": 8.926064440328348e-05,
+ "loss": 0.4645,
+ "step": 55700
+ },
+ {
+ "epoch": 0.28763653061329786,
+ "grad_norm": 19537.873046875,
+ "learning_rate": 8.92380135136204e-05,
+ "loss": 0.4595,
+ "step": 55750
+ },
+ {
+ "epoch": 0.28789450059591065,
+ "grad_norm": 21288.21484375,
+ "learning_rate": 8.921536167946552e-05,
+ "loss": 0.4565,
+ "step": 55800
+ },
+ {
+ "epoch": 0.2881524705785235,
+ "grad_norm": 25019.783203125,
+ "learning_rate": 8.919268891290992e-05,
+ "loss": 0.4635,
+ "step": 55850
+ },
+ {
+ "epoch": 0.2884104405611363,
+ "grad_norm": 23099.5625,
+ "learning_rate": 8.916999522605592e-05,
+ "loss": 0.4561,
+ "step": 55900
+ },
+ {
+ "epoch": 0.28866841054374914,
+ "grad_norm": 22477.849609375,
+ "learning_rate": 8.914728063101694e-05,
+ "loss": 0.458,
+ "step": 55950
+ },
+ {
+ "epoch": 0.28892638052636194,
+ "grad_norm": 19823.103515625,
+ "learning_rate": 8.91245451399176e-05,
+ "loss": 0.457,
+ "step": 56000
+ },
+ {
+ "epoch": 0.2891843505089748,
+ "grad_norm": 20293.353515625,
+ "learning_rate": 8.910178876489368e-05,
+ "loss": 0.4614,
+ "step": 56050
+ },
+ {
+ "epoch": 0.2894423204915876,
+ "grad_norm": 19020.892578125,
+ "learning_rate": 8.907901151809205e-05,
+ "loss": 0.4597,
+ "step": 56100
+ },
+ {
+ "epoch": 0.28970029047420043,
+ "grad_norm": 20133.603515625,
+ "learning_rate": 8.905621341167082e-05,
+ "loss": 0.4577,
+ "step": 56150
+ },
+ {
+ "epoch": 0.2899582604568132,
+ "grad_norm": 21008.95703125,
+ "learning_rate": 8.903339445779915e-05,
+ "loss": 0.4596,
+ "step": 56200
+ },
+ {
+ "epoch": 0.29021623043942607,
+ "grad_norm": 21339.892578125,
+ "learning_rate": 8.901055466865735e-05,
+ "loss": 0.4631,
+ "step": 56250
+ },
+ {
+ "epoch": 0.29047420042203886,
+ "grad_norm": 20088.455078125,
+ "learning_rate": 8.898769405643686e-05,
+ "loss": 0.4571,
+ "step": 56300
+ },
+ {
+ "epoch": 0.2907321704046517,
+ "grad_norm": 21779.341796875,
+ "learning_rate": 8.896481263334023e-05,
+ "loss": 0.4541,
+ "step": 56350
+ },
+ {
+ "epoch": 0.29099014038726456,
+ "grad_norm": 24433.103515625,
+ "learning_rate": 8.894191041158113e-05,
+ "loss": 0.4627,
+ "step": 56400
+ },
+ {
+ "epoch": 0.29124811036987736,
+ "grad_norm": 22214.70703125,
+ "learning_rate": 8.891898740338432e-05,
+ "loss": 0.4585,
+ "step": 56450
+ },
+ {
+ "epoch": 0.2915060803524902,
+ "grad_norm": 20558.955078125,
+ "learning_rate": 8.889604362098567e-05,
+ "loss": 0.4547,
+ "step": 56500
+ },
+ {
+ "epoch": 0.291764050335103,
+ "grad_norm": 22438.3828125,
+ "learning_rate": 8.88730790766321e-05,
+ "loss": 0.4581,
+ "step": 56550
+ },
+ {
+ "epoch": 0.29202202031771585,
+ "grad_norm": 22429.658203125,
+ "learning_rate": 8.885009378258164e-05,
+ "loss": 0.4556,
+ "step": 56600
+ },
+ {
+ "epoch": 0.29227999030032864,
+ "grad_norm": 18076.814453125,
+ "learning_rate": 8.882708775110342e-05,
+ "loss": 0.4571,
+ "step": 56650
+ },
+ {
+ "epoch": 0.2925379602829415,
+ "grad_norm": 19816.873046875,
+ "learning_rate": 8.88040609944776e-05,
+ "loss": 0.4584,
+ "step": 56700
+ },
+ {
+ "epoch": 0.2927959302655543,
+ "grad_norm": 20448.5234375,
+ "learning_rate": 8.878101352499542e-05,
+ "loss": 0.4575,
+ "step": 56750
+ },
+ {
+ "epoch": 0.29305390024816713,
+ "grad_norm": 19950.4609375,
+ "learning_rate": 8.875794535495915e-05,
+ "loss": 0.4558,
+ "step": 56800
+ },
+ {
+ "epoch": 0.2933118702307799,
+ "grad_norm": 20185.0625,
+ "learning_rate": 8.873485649668218e-05,
+ "loss": 0.4523,
+ "step": 56850
+ },
+ {
+ "epoch": 0.2935698402133928,
+ "grad_norm": 22338.080078125,
+ "learning_rate": 8.871174696248888e-05,
+ "loss": 0.4648,
+ "step": 56900
+ },
+ {
+ "epoch": 0.29382781019600557,
+ "grad_norm": 22531.541015625,
+ "learning_rate": 8.868861676471463e-05,
+ "loss": 0.4628,
+ "step": 56950
+ },
+ {
+ "epoch": 0.2940857801786184,
+ "grad_norm": 19558.10546875,
+ "learning_rate": 8.866546591570592e-05,
+ "loss": 0.4565,
+ "step": 57000
+ },
+ {
+ "epoch": 0.29434375016123127,
+ "grad_norm": 20166.33203125,
+ "learning_rate": 8.864229442782023e-05,
+ "loss": 0.4527,
+ "step": 57050
+ },
+ {
+ "epoch": 0.29460172014384406,
+ "grad_norm": 20262.185546875,
+ "learning_rate": 8.861910231342603e-05,
+ "loss": 0.4575,
+ "step": 57100
+ },
+ {
+ "epoch": 0.2948596901264569,
+ "grad_norm": 19107.080078125,
+ "learning_rate": 8.859588958490283e-05,
+ "loss": 0.4564,
+ "step": 57150
+ },
+ {
+ "epoch": 0.2951176601090697,
+ "grad_norm": 19690.37109375,
+ "learning_rate": 8.857265625464113e-05,
+ "loss": 0.4576,
+ "step": 57200
+ },
+ {
+ "epoch": 0.29537563009168255,
+ "grad_norm": 21793.189453125,
+ "learning_rate": 8.854940233504245e-05,
+ "loss": 0.4616,
+ "step": 57250
+ },
+ {
+ "epoch": 0.29563360007429534,
+ "grad_norm": 21543.033203125,
+ "learning_rate": 8.852612783851926e-05,
+ "loss": 0.4559,
+ "step": 57300
+ },
+ {
+ "epoch": 0.2958915700569082,
+ "grad_norm": 21455.56640625,
+ "learning_rate": 8.850283277749504e-05,
+ "loss": 0.4583,
+ "step": 57350
+ },
+ {
+ "epoch": 0.296149540039521,
+ "grad_norm": 21236.935546875,
+ "learning_rate": 8.847951716440426e-05,
+ "loss": 0.46,
+ "step": 57400
+ },
+ {
+ "epoch": 0.29640751002213384,
+ "grad_norm": 22411.130859375,
+ "learning_rate": 8.845618101169232e-05,
+ "loss": 0.4563,
+ "step": 57450
+ },
+ {
+ "epoch": 0.29666548000474663,
+ "grad_norm": 19269.26171875,
+ "learning_rate": 8.843282433181561e-05,
+ "loss": 0.4634,
+ "step": 57500
+ },
+ {
+ "epoch": 0.2969234499873595,
+ "grad_norm": 22179.669921875,
+ "learning_rate": 8.840944713724149e-05,
+ "loss": 0.4582,
+ "step": 57550
+ },
+ {
+ "epoch": 0.29718141996997227,
+ "grad_norm": 19867.076171875,
+ "learning_rate": 8.838604944044825e-05,
+ "loss": 0.4591,
+ "step": 57600
+ },
+ {
+ "epoch": 0.2974393899525851,
+ "grad_norm": 19806.09375,
+ "learning_rate": 8.836263125392511e-05,
+ "loss": 0.4571,
+ "step": 57650
+ },
+ {
+ "epoch": 0.2976973599351979,
+ "grad_norm": 21762.22265625,
+ "learning_rate": 8.833919259017225e-05,
+ "loss": 0.4526,
+ "step": 57700
+ },
+ {
+ "epoch": 0.29795532991781076,
+ "grad_norm": 21031.263671875,
+ "learning_rate": 8.83157334617008e-05,
+ "loss": 0.4577,
+ "step": 57750
+ },
+ {
+ "epoch": 0.2982132999004236,
+ "grad_norm": 22886.556640625,
+ "learning_rate": 8.829225388103276e-05,
+ "loss": 0.4553,
+ "step": 57800
+ },
+ {
+ "epoch": 0.2984712698830364,
+ "grad_norm": 19710.173828125,
+ "learning_rate": 8.826875386070108e-05,
+ "loss": 0.4556,
+ "step": 57850
+ },
+ {
+ "epoch": 0.29872923986564925,
+ "grad_norm": 20607.244140625,
+ "learning_rate": 8.824523341324963e-05,
+ "loss": 0.458,
+ "step": 57900
+ },
+ {
+ "epoch": 0.29898720984826205,
+ "grad_norm": 20672.05859375,
+ "learning_rate": 8.822169255123317e-05,
+ "loss": 0.4531,
+ "step": 57950
+ },
+ {
+ "epoch": 0.2992451798308749,
+ "grad_norm": 21375.76953125,
+ "learning_rate": 8.819813128721732e-05,
+ "loss": 0.4602,
+ "step": 58000
+ },
+ {
+ "epoch": 0.2995031498134877,
+ "grad_norm": 20848.328125,
+ "learning_rate": 8.817454963377865e-05,
+ "loss": 0.4557,
+ "step": 58050
+ },
+ {
+ "epoch": 0.29976111979610054,
+ "grad_norm": 20778.619140625,
+ "learning_rate": 8.81509476035046e-05,
+ "loss": 0.4588,
+ "step": 58100
+ },
+ {
+ "epoch": 0.30001908977871333,
+ "grad_norm": 19791.296875,
+ "learning_rate": 8.812732520899347e-05,
+ "loss": 0.4609,
+ "step": 58150
+ },
+ {
+ "epoch": 0.3002770597613262,
+ "grad_norm": 21814.482421875,
+ "learning_rate": 8.810368246285445e-05,
+ "loss": 0.4597,
+ "step": 58200
+ },
+ {
+ "epoch": 0.300535029743939,
+ "grad_norm": 22417.65625,
+ "learning_rate": 8.808001937770755e-05,
+ "loss": 0.461,
+ "step": 58250
+ },
+ {
+ "epoch": 0.3007929997265518,
+ "grad_norm": 21347.53515625,
+ "learning_rate": 8.80563359661837e-05,
+ "loss": 0.4523,
+ "step": 58300
+ },
+ {
+ "epoch": 0.3010509697091646,
+ "grad_norm": 21612.689453125,
+ "learning_rate": 8.803263224092461e-05,
+ "loss": 0.4588,
+ "step": 58350
+ },
+ {
+ "epoch": 0.30130893969177747,
+ "grad_norm": 19139.7109375,
+ "learning_rate": 8.80089082145829e-05,
+ "loss": 0.4576,
+ "step": 58400
+ },
+ {
+ "epoch": 0.3015669096743903,
+ "grad_norm": 21629.78125,
+ "learning_rate": 8.798516389982197e-05,
+ "loss": 0.4514,
+ "step": 58450
+ },
+ {
+ "epoch": 0.3018248796570031,
+ "grad_norm": 20307.630859375,
+ "learning_rate": 8.79613993093161e-05,
+ "loss": 0.4606,
+ "step": 58500
+ },
+ {
+ "epoch": 0.30208284963961596,
+ "grad_norm": 17832.3359375,
+ "learning_rate": 8.793761445575037e-05,
+ "loss": 0.4654,
+ "step": 58550
+ },
+ {
+ "epoch": 0.30234081962222875,
+ "grad_norm": 19975.20703125,
+ "learning_rate": 8.791380935182065e-05,
+ "loss": 0.4519,
+ "step": 58600
+ },
+ {
+ "epoch": 0.3025987896048416,
+ "grad_norm": 23387.681640625,
+ "learning_rate": 8.788998401023365e-05,
+ "loss": 0.4576,
+ "step": 58650
+ },
+ {
+ "epoch": 0.3028567595874544,
+ "grad_norm": 18704.669921875,
+ "learning_rate": 8.78661384437069e-05,
+ "loss": 0.4634,
+ "step": 58700
+ },
+ {
+ "epoch": 0.30311472957006724,
+ "grad_norm": 21739.806640625,
+ "learning_rate": 8.784227266496868e-05,
+ "loss": 0.4471,
+ "step": 58750
+ },
+ {
+ "epoch": 0.30337269955268004,
+ "grad_norm": 22190.74609375,
+ "learning_rate": 8.781838668675806e-05,
+ "loss": 0.4508,
+ "step": 58800
+ },
+ {
+ "epoch": 0.3036306695352929,
+ "grad_norm": 19186.9609375,
+ "learning_rate": 8.779448052182495e-05,
+ "loss": 0.4575,
+ "step": 58850
+ },
+ {
+ "epoch": 0.3038886395179057,
+ "grad_norm": 21925.8984375,
+ "learning_rate": 8.777055418293e-05,
+ "loss": 0.4614,
+ "step": 58900
+ },
+ {
+ "epoch": 0.3041466095005185,
+ "grad_norm": 21280.16796875,
+ "learning_rate": 8.774660768284459e-05,
+ "loss": 0.4621,
+ "step": 58950
+ },
+ {
+ "epoch": 0.3044045794831313,
+ "grad_norm": 19872.3828125,
+ "learning_rate": 8.772264103435094e-05,
+ "loss": 0.4617,
+ "step": 59000
+ },
+ {
+ "epoch": 0.30466254946574417,
+ "grad_norm": 17518.58984375,
+ "learning_rate": 8.769865425024195e-05,
+ "loss": 0.4548,
+ "step": 59050
+ },
+ {
+ "epoch": 0.30492051944835696,
+ "grad_norm": 25605.537109375,
+ "learning_rate": 8.767464734332131e-05,
+ "loss": 0.4532,
+ "step": 59100
+ },
+ {
+ "epoch": 0.3051784894309698,
+ "grad_norm": 20151.53515625,
+ "learning_rate": 8.765062032640346e-05,
+ "loss": 0.4558,
+ "step": 59150
+ },
+ {
+ "epoch": 0.30543645941358266,
+ "grad_norm": 19346.048828125,
+ "learning_rate": 8.762657321231353e-05,
+ "loss": 0.4624,
+ "step": 59200
+ },
+ {
+ "epoch": 0.30569442939619546,
+ "grad_norm": 21447.115234375,
+ "learning_rate": 8.760250601388741e-05,
+ "loss": 0.4632,
+ "step": 59250
+ },
+ {
+ "epoch": 0.3059523993788083,
+ "grad_norm": 19053.896484375,
+ "learning_rate": 8.757841874397172e-05,
+ "loss": 0.454,
+ "step": 59300
+ },
+ {
+ "epoch": 0.3062103693614211,
+ "grad_norm": 20928.8515625,
+ "learning_rate": 8.755431141542376e-05,
+ "loss": 0.4509,
+ "step": 59350
+ },
+ {
+ "epoch": 0.30646833934403395,
+ "grad_norm": 20900.40234375,
+ "learning_rate": 8.753018404111157e-05,
+ "loss": 0.4523,
+ "step": 59400
+ },
+ {
+ "epoch": 0.30672630932664674,
+ "grad_norm": 19776.572265625,
+ "learning_rate": 8.750603663391385e-05,
+ "loss": 0.458,
+ "step": 59450
+ },
+ {
+ "epoch": 0.3069842793092596,
+ "grad_norm": 21503.505859375,
+ "learning_rate": 8.748186920672005e-05,
+ "loss": 0.4496,
+ "step": 59500
+ },
+ {
+ "epoch": 0.3072422492918724,
+ "grad_norm": 20588.5078125,
+ "learning_rate": 8.745768177243027e-05,
+ "loss": 0.4578,
+ "step": 59550
+ },
+ {
+ "epoch": 0.30750021927448523,
+ "grad_norm": 20516.150390625,
+ "learning_rate": 8.743347434395528e-05,
+ "loss": 0.46,
+ "step": 59600
+ },
+ {
+ "epoch": 0.307758189257098,
+ "grad_norm": 20487.498046875,
+ "learning_rate": 8.740924693421655e-05,
+ "loss": 0.4574,
+ "step": 59650
+ },
+ {
+ "epoch": 0.3080161592397109,
+ "grad_norm": 21070.3671875,
+ "learning_rate": 8.738499955614619e-05,
+ "loss": 0.4564,
+ "step": 59700
+ },
+ {
+ "epoch": 0.30827412922232367,
+ "grad_norm": 19067.427734375,
+ "learning_rate": 8.736073222268697e-05,
+ "loss": 0.4523,
+ "step": 59750
+ },
+ {
+ "epoch": 0.3085320992049365,
+ "grad_norm": 22084.68359375,
+ "learning_rate": 8.733644494679236e-05,
+ "loss": 0.4558,
+ "step": 59800
+ },
+ {
+ "epoch": 0.3087900691875493,
+ "grad_norm": 22324.9140625,
+ "learning_rate": 8.731213774142639e-05,
+ "loss": 0.4585,
+ "step": 59850
+ },
+ {
+ "epoch": 0.30904803917016216,
+ "grad_norm": 19219.47265625,
+ "learning_rate": 8.728781061956383e-05,
+ "loss": 0.4571,
+ "step": 59900
+ },
+ {
+ "epoch": 0.309306009152775,
+ "grad_norm": 20598.125,
+ "learning_rate": 8.726346359418998e-05,
+ "loss": 0.4581,
+ "step": 59950
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "grad_norm": 22155.720703125,
+ "learning_rate": 8.723909667830082e-05,
+ "loss": 0.4578,
+ "step": 60000
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "eval_loss": 0.44494956731796265,
+ "eval_runtime": 3261.5111,
+ "eval_samples_per_second": 950.823,
+ "eval_steps_per_second": 1.857,
+ "step": 60000
+ },
+ {
+ "epoch": 0.30982194911800065,
+ "grad_norm": 22012.822265625,
+ "learning_rate": 8.721470988490297e-05,
+ "loss": 0.4533,
+ "step": 60050
+ },
+ {
+ "epoch": 0.31007991910061344,
+ "grad_norm": 20934.453125,
+ "learning_rate": 8.719030322701358e-05,
+ "loss": 0.4538,
+ "step": 60100
+ },
+ {
+ "epoch": 0.3103378890832263,
+ "grad_norm": 20173.20703125,
+ "learning_rate": 8.716587671766049e-05,
+ "loss": 0.4559,
+ "step": 60150
+ },
+ {
+ "epoch": 0.3105958590658391,
+ "grad_norm": 19343.833984375,
+ "learning_rate": 8.714143036988208e-05,
+ "loss": 0.4579,
+ "step": 60200
+ },
+ {
+ "epoch": 0.31085382904845194,
+ "grad_norm": 20720.435546875,
+ "learning_rate": 8.711696419672734e-05,
+ "loss": 0.4529,
+ "step": 60250
+ },
+ {
+ "epoch": 0.31111179903106473,
+ "grad_norm": 22050.85546875,
+ "learning_rate": 8.709247821125583e-05,
+ "loss": 0.4505,
+ "step": 60300
+ },
+ {
+ "epoch": 0.3113697690136776,
+ "grad_norm": 22470.55078125,
+ "learning_rate": 8.706797242653773e-05,
+ "loss": 0.4616,
+ "step": 60350
+ },
+ {
+ "epoch": 0.31162773899629037,
+ "grad_norm": 21057.978515625,
+ "learning_rate": 8.70434468556537e-05,
+ "loss": 0.4568,
+ "step": 60400
+ },
+ {
+ "epoch": 0.3118857089789032,
+ "grad_norm": 21035.34375,
+ "learning_rate": 8.701890151169507e-05,
+ "loss": 0.4551,
+ "step": 60450
+ },
+ {
+ "epoch": 0.312143678961516,
+ "grad_norm": 20412.056640625,
+ "learning_rate": 8.699433640776363e-05,
+ "loss": 0.4521,
+ "step": 60500
+ },
+ {
+ "epoch": 0.31240164894412886,
+ "grad_norm": 19888.26953125,
+ "learning_rate": 8.696975155697175e-05,
+ "loss": 0.4565,
+ "step": 60550
+ },
+ {
+ "epoch": 0.3126596189267417,
+ "grad_norm": 22491.900390625,
+ "learning_rate": 8.694514697244238e-05,
+ "loss": 0.4578,
+ "step": 60600
+ },
+ {
+ "epoch": 0.3129175889093545,
+ "grad_norm": 20026.357421875,
+ "learning_rate": 8.692052266730897e-05,
+ "loss": 0.4554,
+ "step": 60650
+ },
+ {
+ "epoch": 0.31317555889196735,
+ "grad_norm": 22979.109375,
+ "learning_rate": 8.689587865471547e-05,
+ "loss": 0.461,
+ "step": 60700
+ },
+ {
+ "epoch": 0.31343352887458015,
+ "grad_norm": 21558.291015625,
+ "learning_rate": 8.68712149478164e-05,
+ "loss": 0.4546,
+ "step": 60750
+ },
+ {
+ "epoch": 0.313691498857193,
+ "grad_norm": 22115.384765625,
+ "learning_rate": 8.684653155977676e-05,
+ "loss": 0.4518,
+ "step": 60800
+ },
+ {
+ "epoch": 0.3139494688398058,
+ "grad_norm": 21422.41015625,
+ "learning_rate": 8.682182850377205e-05,
+ "loss": 0.4602,
+ "step": 60850
+ },
+ {
+ "epoch": 0.31420743882241864,
+ "grad_norm": 21101.02734375,
+ "learning_rate": 8.679710579298832e-05,
+ "loss": 0.4579,
+ "step": 60900
+ },
+ {
+ "epoch": 0.31446540880503143,
+ "grad_norm": 18844.361328125,
+ "learning_rate": 8.677236344062203e-05,
+ "loss": 0.4569,
+ "step": 60950
+ },
+ {
+ "epoch": 0.3147233787876443,
+ "grad_norm": 20492.796875,
+ "learning_rate": 8.67476014598802e-05,
+ "loss": 0.4542,
+ "step": 61000
+ },
+ {
+ "epoch": 0.3149813487702571,
+ "grad_norm": 28102.55078125,
+ "learning_rate": 8.67228198639803e-05,
+ "loss": 0.4516,
+ "step": 61050
+ },
+ {
+ "epoch": 0.3152393187528699,
+ "grad_norm": 20697.494140625,
+ "learning_rate": 8.669801866615024e-05,
+ "loss": 0.4551,
+ "step": 61100
+ },
+ {
+ "epoch": 0.3154972887354827,
+ "grad_norm": 20726.90625,
+ "learning_rate": 8.667319787962842e-05,
+ "loss": 0.4576,
+ "step": 61150
+ },
+ {
+ "epoch": 0.31575525871809557,
+ "grad_norm": 20007.04296875,
+ "learning_rate": 8.664835751766371e-05,
+ "loss": 0.4544,
+ "step": 61200
+ },
+ {
+ "epoch": 0.31601322870070836,
+ "grad_norm": 23061.224609375,
+ "learning_rate": 8.662349759351542e-05,
+ "loss": 0.458,
+ "step": 61250
+ },
+ {
+ "epoch": 0.3162711986833212,
+ "grad_norm": 19895.3125,
+ "learning_rate": 8.65986181204533e-05,
+ "loss": 0.4555,
+ "step": 61300
+ },
+ {
+ "epoch": 0.31652916866593406,
+ "grad_norm": 22702.5234375,
+ "learning_rate": 8.65737191117575e-05,
+ "loss": 0.4586,
+ "step": 61350
+ },
+ {
+ "epoch": 0.31678713864854685,
+ "grad_norm": 20045.404296875,
+ "learning_rate": 8.654880058071866e-05,
+ "loss": 0.4583,
+ "step": 61400
+ },
+ {
+ "epoch": 0.3170451086311597,
+ "grad_norm": 21180.455078125,
+ "learning_rate": 8.652386254063778e-05,
+ "loss": 0.4594,
+ "step": 61450
+ },
+ {
+ "epoch": 0.3173030786137725,
+ "grad_norm": 19104.767578125,
+ "learning_rate": 8.649890500482633e-05,
+ "loss": 0.4532,
+ "step": 61500
+ },
+ {
+ "epoch": 0.31756104859638534,
+ "grad_norm": 23137.869140625,
+ "learning_rate": 8.647392798660613e-05,
+ "loss": 0.4535,
+ "step": 61550
+ },
+ {
+ "epoch": 0.31781901857899814,
+ "grad_norm": 21784.001953125,
+ "learning_rate": 8.644893149930949e-05,
+ "loss": 0.4518,
+ "step": 61600
+ },
+ {
+ "epoch": 0.318076988561611,
+ "grad_norm": 20489.796875,
+ "learning_rate": 8.642391555627897e-05,
+ "loss": 0.4572,
+ "step": 61650
+ },
+ {
+ "epoch": 0.3183349585442238,
+ "grad_norm": 21743.728515625,
+ "learning_rate": 8.639888017086764e-05,
+ "loss": 0.4601,
+ "step": 61700
+ },
+ {
+ "epoch": 0.3185929285268366,
+ "grad_norm": 21714.6171875,
+ "learning_rate": 8.63738253564389e-05,
+ "loss": 0.4597,
+ "step": 61750
+ },
+ {
+ "epoch": 0.3188508985094494,
+ "grad_norm": 19896.208984375,
+ "learning_rate": 8.634875112636653e-05,
+ "loss": 0.4532,
+ "step": 61800
+ },
+ {
+ "epoch": 0.31910886849206227,
+ "grad_norm": 22215.173828125,
+ "learning_rate": 8.632365749403465e-05,
+ "loss": 0.4532,
+ "step": 61850
+ },
+ {
+ "epoch": 0.31936683847467506,
+ "grad_norm": 22466.958984375,
+ "learning_rate": 8.629854447283778e-05,
+ "loss": 0.4539,
+ "step": 61900
+ },
+ {
+ "epoch": 0.3196248084572879,
+ "grad_norm": 21345.197265625,
+ "learning_rate": 8.627341207618073e-05,
+ "loss": 0.4551,
+ "step": 61950
+ },
+ {
+ "epoch": 0.3198827784399007,
+ "grad_norm": 20988.8203125,
+ "learning_rate": 8.624826031747872e-05,
+ "loss": 0.4593,
+ "step": 62000
+ },
+ {
+ "epoch": 0.32014074842251355,
+ "grad_norm": 23295.70703125,
+ "learning_rate": 8.622308921015726e-05,
+ "loss": 0.4547,
+ "step": 62050
+ },
+ {
+ "epoch": 0.3203987184051264,
+ "grad_norm": 22620.431640625,
+ "learning_rate": 8.619789876765221e-05,
+ "loss": 0.4601,
+ "step": 62100
+ },
+ {
+ "epoch": 0.3206566883877392,
+ "grad_norm": 21914.44140625,
+ "learning_rate": 8.61726890034097e-05,
+ "loss": 0.4474,
+ "step": 62150
+ },
+ {
+ "epoch": 0.32091465837035205,
+ "grad_norm": 20521.265625,
+ "learning_rate": 8.614745993088626e-05,
+ "loss": 0.4565,
+ "step": 62200
+ },
+ {
+ "epoch": 0.32117262835296484,
+ "grad_norm": 22810.072265625,
+ "learning_rate": 8.612221156354868e-05,
+ "loss": 0.453,
+ "step": 62250
+ },
+ {
+ "epoch": 0.3214305983355777,
+ "grad_norm": 20862.349609375,
+ "learning_rate": 8.609694391487402e-05,
+ "loss": 0.4543,
+ "step": 62300
+ },
+ {
+ "epoch": 0.3216885683181905,
+ "grad_norm": 22115.298828125,
+ "learning_rate": 8.607165699834967e-05,
+ "loss": 0.453,
+ "step": 62350
+ },
+ {
+ "epoch": 0.32194653830080333,
+ "grad_norm": 22504.859375,
+ "learning_rate": 8.60463508274733e-05,
+ "loss": 0.4552,
+ "step": 62400
+ },
+ {
+ "epoch": 0.3222045082834161,
+ "grad_norm": 21758.9453125,
+ "learning_rate": 8.602102541575286e-05,
+ "loss": 0.4526,
+ "step": 62450
+ },
+ {
+ "epoch": 0.322462478266029,
+ "grad_norm": 20388.23828125,
+ "learning_rate": 8.599568077670654e-05,
+ "loss": 0.4522,
+ "step": 62500
+ },
+ {
+ "epoch": 0.32272044824864177,
+ "grad_norm": 22393.857421875,
+ "learning_rate": 8.597031692386286e-05,
+ "loss": 0.4457,
+ "step": 62550
+ },
+ {
+ "epoch": 0.3229784182312546,
+ "grad_norm": 22233.978515625,
+ "learning_rate": 8.594493387076052e-05,
+ "loss": 0.449,
+ "step": 62600
+ },
+ {
+ "epoch": 0.3232363882138674,
+ "grad_norm": 19831.12109375,
+ "learning_rate": 8.591953163094852e-05,
+ "loss": 0.4556,
+ "step": 62650
+ },
+ {
+ "epoch": 0.32349435819648026,
+ "grad_norm": 19109.783203125,
+ "learning_rate": 8.589411021798608e-05,
+ "loss": 0.4552,
+ "step": 62700
+ },
+ {
+ "epoch": 0.3237523281790931,
+ "grad_norm": 23053.642578125,
+ "learning_rate": 8.586866964544265e-05,
+ "loss": 0.4552,
+ "step": 62750
+ },
+ {
+ "epoch": 0.3240102981617059,
+ "grad_norm": 17938.240234375,
+ "learning_rate": 8.584320992689791e-05,
+ "loss": 0.4512,
+ "step": 62800
+ },
+ {
+ "epoch": 0.32426826814431875,
+ "grad_norm": 19569.431640625,
+ "learning_rate": 8.581773107594179e-05,
+ "loss": 0.4557,
+ "step": 62850
+ },
+ {
+ "epoch": 0.32452623812693154,
+ "grad_norm": 19247.82421875,
+ "learning_rate": 8.579223310617439e-05,
+ "loss": 0.4599,
+ "step": 62900
+ },
+ {
+ "epoch": 0.3247842081095444,
+ "grad_norm": 21565.8671875,
+ "learning_rate": 8.576671603120603e-05,
+ "loss": 0.4573,
+ "step": 62950
+ },
+ {
+ "epoch": 0.3250421780921572,
+ "grad_norm": 19029.005859375,
+ "learning_rate": 8.574117986465723e-05,
+ "loss": 0.455,
+ "step": 63000
+ },
+ {
+ "epoch": 0.32530014807477003,
+ "grad_norm": 21574.626953125,
+ "learning_rate": 8.57156246201587e-05,
+ "loss": 0.4512,
+ "step": 63050
+ },
+ {
+ "epoch": 0.32555811805738283,
+ "grad_norm": 21181.8203125,
+ "learning_rate": 8.569005031135136e-05,
+ "loss": 0.4513,
+ "step": 63100
+ },
+ {
+ "epoch": 0.3258160880399957,
+ "grad_norm": 22689.93359375,
+ "learning_rate": 8.566445695188624e-05,
+ "loss": 0.4515,
+ "step": 63150
+ },
+ {
+ "epoch": 0.32607405802260847,
+ "grad_norm": 22001.9921875,
+ "learning_rate": 8.563884455542461e-05,
+ "loss": 0.4459,
+ "step": 63200
+ },
+ {
+ "epoch": 0.3263320280052213,
+ "grad_norm": 20342.96875,
+ "learning_rate": 8.561321313563786e-05,
+ "loss": 0.4526,
+ "step": 63250
+ },
+ {
+ "epoch": 0.3265899979878341,
+ "grad_norm": 20673.75390625,
+ "learning_rate": 8.558756270620756e-05,
+ "loss": 0.4581,
+ "step": 63300
+ },
+ {
+ "epoch": 0.32684796797044696,
+ "grad_norm": 23113.490234375,
+ "learning_rate": 8.556189328082538e-05,
+ "loss": 0.4525,
+ "step": 63350
+ },
+ {
+ "epoch": 0.32710593795305976,
+ "grad_norm": 21878.384765625,
+ "learning_rate": 8.55362048731932e-05,
+ "loss": 0.4536,
+ "step": 63400
+ },
+ {
+ "epoch": 0.3273639079356726,
+ "grad_norm": 22787.79296875,
+ "learning_rate": 8.551049749702297e-05,
+ "loss": 0.4586,
+ "step": 63450
+ },
+ {
+ "epoch": 0.32762187791828545,
+ "grad_norm": 20422.0625,
+ "learning_rate": 8.548477116603679e-05,
+ "loss": 0.4496,
+ "step": 63500
+ },
+ {
+ "epoch": 0.32787984790089825,
+ "grad_norm": 21936.8828125,
+ "learning_rate": 8.54590258939669e-05,
+ "loss": 0.4509,
+ "step": 63550
+ },
+ {
+ "epoch": 0.3281378178835111,
+ "grad_norm": 21049.275390625,
+ "learning_rate": 8.54332616945556e-05,
+ "loss": 0.4514,
+ "step": 63600
+ },
+ {
+ "epoch": 0.3283957878661239,
+ "grad_norm": 22976.1015625,
+ "learning_rate": 8.540747858155533e-05,
+ "loss": 0.4611,
+ "step": 63650
+ },
+ {
+ "epoch": 0.32865375784873674,
+ "grad_norm": 21968.18359375,
+ "learning_rate": 8.538167656872861e-05,
+ "loss": 0.4557,
+ "step": 63700
+ },
+ {
+ "epoch": 0.32891172783134953,
+ "grad_norm": 22231.755859375,
+ "learning_rate": 8.53558556698481e-05,
+ "loss": 0.4556,
+ "step": 63750
+ },
+ {
+ "epoch": 0.3291696978139624,
+ "grad_norm": 21183.978515625,
+ "learning_rate": 8.533001589869643e-05,
+ "loss": 0.4479,
+ "step": 63800
+ },
+ {
+ "epoch": 0.3294276677965752,
+ "grad_norm": 23931.5234375,
+ "learning_rate": 8.530415726906642e-05,
+ "loss": 0.4533,
+ "step": 63850
+ },
+ {
+ "epoch": 0.329685637779188,
+ "grad_norm": 21073.62890625,
+ "learning_rate": 8.527827979476087e-05,
+ "loss": 0.4577,
+ "step": 63900
+ },
+ {
+ "epoch": 0.3299436077618008,
+ "grad_norm": 19957.09375,
+ "learning_rate": 8.525238348959268e-05,
+ "loss": 0.4486,
+ "step": 63950
+ },
+ {
+ "epoch": 0.33020157774441367,
+ "grad_norm": 18999.962890625,
+ "learning_rate": 8.522646836738482e-05,
+ "loss": 0.4525,
+ "step": 64000
+ },
+ {
+ "epoch": 0.33045954772702646,
+ "grad_norm": 24102.1640625,
+ "learning_rate": 8.520053444197026e-05,
+ "loss": 0.4545,
+ "step": 64050
+ },
+ {
+ "epoch": 0.3307175177096393,
+ "grad_norm": 20205.65234375,
+ "learning_rate": 8.517458172719203e-05,
+ "loss": 0.4539,
+ "step": 64100
+ },
+ {
+ "epoch": 0.33097548769225216,
+ "grad_norm": 24099.8203125,
+ "learning_rate": 8.514861023690321e-05,
+ "loss": 0.4465,
+ "step": 64150
+ },
+ {
+ "epoch": 0.33123345767486495,
+ "grad_norm": 19802.203125,
+ "learning_rate": 8.512261998496685e-05,
+ "loss": 0.4546,
+ "step": 64200
+ },
+ {
+ "epoch": 0.3314914276574778,
+ "grad_norm": 23137.609375,
+ "learning_rate": 8.509661098525603e-05,
+ "loss": 0.4539,
+ "step": 64250
+ },
+ {
+ "epoch": 0.3317493976400906,
+ "grad_norm": 23578.609375,
+ "learning_rate": 8.507058325165391e-05,
+ "loss": 0.4513,
+ "step": 64300
+ },
+ {
+ "epoch": 0.33200736762270344,
+ "grad_norm": 19172.0859375,
+ "learning_rate": 8.504453679805353e-05,
+ "loss": 0.456,
+ "step": 64350
+ },
+ {
+ "epoch": 0.33226533760531624,
+ "grad_norm": 19165.775390625,
+ "learning_rate": 8.5018471638358e-05,
+ "loss": 0.4578,
+ "step": 64400
+ },
+ {
+ "epoch": 0.3325233075879291,
+ "grad_norm": 18070.72265625,
+ "learning_rate": 8.49923877864804e-05,
+ "loss": 0.4608,
+ "step": 64450
+ },
+ {
+ "epoch": 0.3327812775705419,
+ "grad_norm": 20918.525390625,
+ "learning_rate": 8.49662852563438e-05,
+ "loss": 0.4526,
+ "step": 64500
+ },
+ {
+ "epoch": 0.3330392475531547,
+ "grad_norm": 21165.05078125,
+ "learning_rate": 8.494016406188121e-05,
+ "loss": 0.4503,
+ "step": 64550
+ },
+ {
+ "epoch": 0.3332972175357675,
+ "grad_norm": 19273.013671875,
+ "learning_rate": 8.491402421703562e-05,
+ "loss": 0.4572,
+ "step": 64600
+ },
+ {
+ "epoch": 0.33355518751838037,
+ "grad_norm": 21221.681640625,
+ "learning_rate": 8.488786573575998e-05,
+ "loss": 0.456,
+ "step": 64650
+ },
+ {
+ "epoch": 0.33381315750099316,
+ "grad_norm": 19485.8125,
+ "learning_rate": 8.486168863201716e-05,
+ "loss": 0.4423,
+ "step": 64700
+ },
+ {
+ "epoch": 0.334071127483606,
+ "grad_norm": 23241.580078125,
+ "learning_rate": 8.483549291978001e-05,
+ "loss": 0.4531,
+ "step": 64750
+ },
+ {
+ "epoch": 0.3343290974662188,
+ "grad_norm": 21281.111328125,
+ "learning_rate": 8.48092786130313e-05,
+ "loss": 0.452,
+ "step": 64800
+ },
+ {
+ "epoch": 0.33458706744883165,
+ "grad_norm": 21610.2578125,
+ "learning_rate": 8.47830457257637e-05,
+ "loss": 0.4488,
+ "step": 64850
+ },
+ {
+ "epoch": 0.3348450374314445,
+ "grad_norm": 19343.466796875,
+ "learning_rate": 8.475679427197982e-05,
+ "loss": 0.4514,
+ "step": 64900
+ },
+ {
+ "epoch": 0.3351030074140573,
+ "grad_norm": 19489.1875,
+ "learning_rate": 8.473052426569219e-05,
+ "loss": 0.447,
+ "step": 64950
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "grad_norm": 24805.84765625,
+ "learning_rate": 8.470423572092323e-05,
+ "loss": 0.4594,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "eval_loss": 0.440469890832901,
+ "eval_runtime": 3318.76,
+ "eval_samples_per_second": 934.421,
+ "eval_steps_per_second": 1.825,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33561894737928294,
+ "grad_norm": 22912.732421875,
+ "learning_rate": 8.467792865170525e-05,
+ "loss": 0.4435,
+ "step": 65050
+ },
+ {
+ "epoch": 0.3358769173618958,
+ "grad_norm": 19958.994140625,
+ "learning_rate": 8.465160307208045e-05,
+ "loss": 0.4588,
+ "step": 65100
+ },
+ {
+ "epoch": 0.3361348873445086,
+ "grad_norm": 20914.193359375,
+ "learning_rate": 8.462525899610092e-05,
+ "loss": 0.4497,
+ "step": 65150
+ },
+ {
+ "epoch": 0.33639285732712143,
+ "grad_norm": 20505.814453125,
+ "learning_rate": 8.459889643782861e-05,
+ "loss": 0.4569,
+ "step": 65200
+ },
+ {
+ "epoch": 0.3366508273097342,
+ "grad_norm": 19486.068359375,
+ "learning_rate": 8.457251541133535e-05,
+ "loss": 0.4505,
+ "step": 65250
+ },
+ {
+ "epoch": 0.3369087972923471,
+ "grad_norm": 21967.84765625,
+ "learning_rate": 8.454611593070284e-05,
+ "loss": 0.4556,
+ "step": 65300
+ },
+ {
+ "epoch": 0.33716676727495987,
+ "grad_norm": 21949.767578125,
+ "learning_rate": 8.451969801002258e-05,
+ "loss": 0.4491,
+ "step": 65350
+ },
+ {
+ "epoch": 0.3374247372575727,
+ "grad_norm": 19765.14453125,
+ "learning_rate": 8.449326166339595e-05,
+ "loss": 0.4507,
+ "step": 65400
+ },
+ {
+ "epoch": 0.3376827072401855,
+ "grad_norm": 21396.982421875,
+ "learning_rate": 8.446680690493417e-05,
+ "loss": 0.4548,
+ "step": 65450
+ },
+ {
+ "epoch": 0.33794067722279836,
+ "grad_norm": 22511.8359375,
+ "learning_rate": 8.444033374875828e-05,
+ "loss": 0.454,
+ "step": 65500
+ },
+ {
+ "epoch": 0.33819864720541115,
+ "grad_norm": 21264.076171875,
+ "learning_rate": 8.441384220899912e-05,
+ "loss": 0.4486,
+ "step": 65550
+ },
+ {
+ "epoch": 0.338456617188024,
+ "grad_norm": 20736.046875,
+ "learning_rate": 8.438733229979741e-05,
+ "loss": 0.4505,
+ "step": 65600
+ },
+ {
+ "epoch": 0.33871458717063685,
+ "grad_norm": 20183.8359375,
+ "learning_rate": 8.436080403530356e-05,
+ "loss": 0.4485,
+ "step": 65650
+ },
+ {
+ "epoch": 0.33897255715324964,
+ "grad_norm": 21947.3671875,
+ "learning_rate": 8.433425742967787e-05,
+ "loss": 0.4499,
+ "step": 65700
+ },
+ {
+ "epoch": 0.3392305271358625,
+ "grad_norm": 22621.236328125,
+ "learning_rate": 8.430769249709042e-05,
+ "loss": 0.4503,
+ "step": 65750
+ },
+ {
+ "epoch": 0.3394884971184753,
+ "grad_norm": 21537.947265625,
+ "learning_rate": 8.428110925172103e-05,
+ "loss": 0.4634,
+ "step": 65800
+ },
+ {
+ "epoch": 0.33974646710108813,
+ "grad_norm": 20869.759765625,
+ "learning_rate": 8.425450770775936e-05,
+ "loss": 0.4504,
+ "step": 65850
+ },
+ {
+ "epoch": 0.34000443708370093,
+ "grad_norm": 20865.12109375,
+ "learning_rate": 8.422788787940477e-05,
+ "loss": 0.4509,
+ "step": 65900
+ },
+ {
+ "epoch": 0.3402624070663138,
+ "grad_norm": 23897.974609375,
+ "learning_rate": 8.42012497808664e-05,
+ "loss": 0.4512,
+ "step": 65950
+ },
+ {
+ "epoch": 0.34052037704892657,
+ "grad_norm": 23978.56640625,
+ "learning_rate": 8.417459342636318e-05,
+ "loss": 0.4513,
+ "step": 66000
+ },
+ {
+ "epoch": 0.3407783470315394,
+ "grad_norm": 22806.99609375,
+ "learning_rate": 8.414791883012374e-05,
+ "loss": 0.4468,
+ "step": 66050
+ },
+ {
+ "epoch": 0.3410363170141522,
+ "grad_norm": 20348.841796875,
+ "learning_rate": 8.412122600638646e-05,
+ "loss": 0.4484,
+ "step": 66100
+ },
+ {
+ "epoch": 0.34129428699676506,
+ "grad_norm": 21868.353515625,
+ "learning_rate": 8.409451496939945e-05,
+ "loss": 0.4601,
+ "step": 66150
+ },
+ {
+ "epoch": 0.34155225697937786,
+ "grad_norm": 20312.36328125,
+ "learning_rate": 8.406778573342055e-05,
+ "loss": 0.4485,
+ "step": 66200
+ },
+ {
+ "epoch": 0.3418102269619907,
+ "grad_norm": 25603.419921875,
+ "learning_rate": 8.404103831271733e-05,
+ "loss": 0.4487,
+ "step": 66250
+ },
+ {
+ "epoch": 0.34206819694460355,
+ "grad_norm": 21330.416015625,
+ "learning_rate": 8.4014272721567e-05,
+ "loss": 0.449,
+ "step": 66300
+ },
+ {
+ "epoch": 0.34232616692721635,
+ "grad_norm": 20045.4453125,
+ "learning_rate": 8.398748897425656e-05,
+ "loss": 0.447,
+ "step": 66350
+ },
+ {
+ "epoch": 0.3425841369098292,
+ "grad_norm": 21575.642578125,
+ "learning_rate": 8.396068708508262e-05,
+ "loss": 0.4495,
+ "step": 66400
+ },
+ {
+ "epoch": 0.342842106892442,
+ "grad_norm": 20396.5390625,
+ "learning_rate": 8.393386706835154e-05,
+ "loss": 0.4478,
+ "step": 66450
+ },
+ {
+ "epoch": 0.34310007687505484,
+ "grad_norm": 20366.8046875,
+ "learning_rate": 8.390702893837929e-05,
+ "loss": 0.4531,
+ "step": 66500
+ },
+ {
+ "epoch": 0.34335804685766763,
+ "grad_norm": 23514.521484375,
+ "learning_rate": 8.388017270949158e-05,
+ "loss": 0.4496,
+ "step": 66550
+ },
+ {
+ "epoch": 0.3436160168402805,
+ "grad_norm": 23656.869140625,
+ "learning_rate": 8.385329839602372e-05,
+ "loss": 0.448,
+ "step": 66600
+ },
+ {
+ "epoch": 0.3438739868228933,
+ "grad_norm": 23712.216796875,
+ "learning_rate": 8.382640601232071e-05,
+ "loss": 0.4502,
+ "step": 66650
+ },
+ {
+ "epoch": 0.3441319568055061,
+ "grad_norm": 23220.240234375,
+ "learning_rate": 8.379949557273717e-05,
+ "loss": 0.4469,
+ "step": 66700
+ },
+ {
+ "epoch": 0.3443899267881189,
+ "grad_norm": 21469.244140625,
+ "learning_rate": 8.37725670916374e-05,
+ "loss": 0.4506,
+ "step": 66750
+ },
+ {
+ "epoch": 0.34464789677073177,
+ "grad_norm": 19195.431640625,
+ "learning_rate": 8.374562058339528e-05,
+ "loss": 0.4494,
+ "step": 66800
+ },
+ {
+ "epoch": 0.34490586675334456,
+ "grad_norm": 21464.130859375,
+ "learning_rate": 8.371865606239433e-05,
+ "loss": 0.4552,
+ "step": 66850
+ },
+ {
+ "epoch": 0.3451638367359574,
+ "grad_norm": 23449.76953125,
+ "learning_rate": 8.36916735430277e-05,
+ "loss": 0.4513,
+ "step": 66900
+ },
+ {
+ "epoch": 0.3454218067185702,
+ "grad_norm": 20593.39453125,
+ "learning_rate": 8.366467303969814e-05,
+ "loss": 0.447,
+ "step": 66950
+ },
+ {
+ "epoch": 0.34567977670118305,
+ "grad_norm": 21341.72265625,
+ "learning_rate": 8.3637654566818e-05,
+ "loss": 0.4448,
+ "step": 67000
+ },
+ {
+ "epoch": 0.3459377466837959,
+ "grad_norm": 20746.919921875,
+ "learning_rate": 8.361061813880919e-05,
+ "loss": 0.4511,
+ "step": 67050
+ },
+ {
+ "epoch": 0.3461957166664087,
+ "grad_norm": 19786.162109375,
+ "learning_rate": 8.358356377010325e-05,
+ "loss": 0.452,
+ "step": 67100
+ },
+ {
+ "epoch": 0.34645368664902154,
+ "grad_norm": 20875.25,
+ "learning_rate": 8.355649147514128e-05,
+ "loss": 0.4491,
+ "step": 67150
+ },
+ {
+ "epoch": 0.34671165663163434,
+ "grad_norm": 22833.728515625,
+ "learning_rate": 8.352940126837394e-05,
+ "loss": 0.4545,
+ "step": 67200
+ },
+ {
+ "epoch": 0.3469696266142472,
+ "grad_norm": 21289.896484375,
+ "learning_rate": 8.350229316426146e-05,
+ "loss": 0.4451,
+ "step": 67250
+ },
+ {
+ "epoch": 0.34722759659686,
+ "grad_norm": 23276.080078125,
+ "learning_rate": 8.347516717727363e-05,
+ "loss": 0.4468,
+ "step": 67300
+ },
+ {
+ "epoch": 0.3474855665794728,
+ "grad_norm": 22568.234375,
+ "learning_rate": 8.344802332188977e-05,
+ "loss": 0.4455,
+ "step": 67350
+ },
+ {
+ "epoch": 0.3477435365620856,
+ "grad_norm": 19527.234375,
+ "learning_rate": 8.342086161259874e-05,
+ "loss": 0.4511,
+ "step": 67400
+ },
+ {
+ "epoch": 0.34800150654469847,
+ "grad_norm": 21764.56640625,
+ "learning_rate": 8.339368206389895e-05,
+ "loss": 0.4481,
+ "step": 67450
+ },
+ {
+ "epoch": 0.34825947652731126,
+ "grad_norm": 21142.33984375,
+ "learning_rate": 8.336648469029829e-05,
+ "loss": 0.4539,
+ "step": 67500
+ },
+ {
+ "epoch": 0.3485174465099241,
+ "grad_norm": 21612.60546875,
+ "learning_rate": 8.333926950631421e-05,
+ "loss": 0.4497,
+ "step": 67550
+ },
+ {
+ "epoch": 0.3487754164925369,
+ "grad_norm": 20772.0390625,
+ "learning_rate": 8.331203652647364e-05,
+ "loss": 0.458,
+ "step": 67600
+ },
+ {
+ "epoch": 0.34903338647514975,
+ "grad_norm": 22197.166015625,
+ "learning_rate": 8.328478576531303e-05,
+ "loss": 0.4499,
+ "step": 67650
+ },
+ {
+ "epoch": 0.34929135645776255,
+ "grad_norm": 20853.865234375,
+ "learning_rate": 8.32575172373783e-05,
+ "loss": 0.4473,
+ "step": 67700
+ },
+ {
+ "epoch": 0.3495493264403754,
+ "grad_norm": 19692.892578125,
+ "learning_rate": 8.323023095722486e-05,
+ "loss": 0.4516,
+ "step": 67750
+ },
+ {
+ "epoch": 0.34980729642298825,
+ "grad_norm": 22032.115234375,
+ "learning_rate": 8.32029269394176e-05,
+ "loss": 0.4452,
+ "step": 67800
+ },
+ {
+ "epoch": 0.35006526640560104,
+ "grad_norm": 23928.783203125,
+ "learning_rate": 8.317560519853089e-05,
+ "loss": 0.4489,
+ "step": 67850
+ },
+ {
+ "epoch": 0.3503232363882139,
+ "grad_norm": 20832.560546875,
+ "learning_rate": 8.314826574914853e-05,
+ "loss": 0.4493,
+ "step": 67900
+ },
+ {
+ "epoch": 0.3505812063708267,
+ "grad_norm": 23453.634765625,
+ "learning_rate": 8.31209086058638e-05,
+ "loss": 0.4487,
+ "step": 67950
+ },
+ {
+ "epoch": 0.35083917635343953,
+ "grad_norm": 23585.826171875,
+ "learning_rate": 8.309353378327938e-05,
+ "loss": 0.4473,
+ "step": 68000
+ },
+ {
+ "epoch": 0.3510971463360523,
+ "grad_norm": 21680.953125,
+ "learning_rate": 8.306614129600745e-05,
+ "loss": 0.4494,
+ "step": 68050
+ },
+ {
+ "epoch": 0.3513551163186652,
+ "grad_norm": 19228.56640625,
+ "learning_rate": 8.303873115866958e-05,
+ "loss": 0.4483,
+ "step": 68100
+ },
+ {
+ "epoch": 0.35161308630127797,
+ "grad_norm": 22056.6328125,
+ "learning_rate": 8.301130338589679e-05,
+ "loss": 0.4485,
+ "step": 68150
+ },
+ {
+ "epoch": 0.3518710562838908,
+ "grad_norm": 22030.484375,
+ "learning_rate": 8.298385799232947e-05,
+ "loss": 0.4462,
+ "step": 68200
+ },
+ {
+ "epoch": 0.3521290262665036,
+ "grad_norm": 19658.33984375,
+ "learning_rate": 8.295639499261745e-05,
+ "loss": 0.4444,
+ "step": 68250
+ },
+ {
+ "epoch": 0.35238699624911646,
+ "grad_norm": 19667.8125,
+ "learning_rate": 8.292891440141997e-05,
+ "loss": 0.4482,
+ "step": 68300
+ },
+ {
+ "epoch": 0.35264496623172925,
+ "grad_norm": 20248.193359375,
+ "learning_rate": 8.290141623340558e-05,
+ "loss": 0.454,
+ "step": 68350
+ },
+ {
+ "epoch": 0.3529029362143421,
+ "grad_norm": 21358.89453125,
+ "learning_rate": 8.287390050325232e-05,
+ "loss": 0.4485,
+ "step": 68400
+ },
+ {
+ "epoch": 0.35316090619695495,
+ "grad_norm": 19209.328125,
+ "learning_rate": 8.284636722564754e-05,
+ "loss": 0.4505,
+ "step": 68450
+ },
+ {
+ "epoch": 0.35341887617956774,
+ "grad_norm": 21890.7109375,
+ "learning_rate": 8.281881641528795e-05,
+ "loss": 0.4531,
+ "step": 68500
+ },
+ {
+ "epoch": 0.3536768461621806,
+ "grad_norm": 20904.052734375,
+ "learning_rate": 8.279124808687967e-05,
+ "loss": 0.4494,
+ "step": 68550
+ },
+ {
+ "epoch": 0.3539348161447934,
+ "grad_norm": 22519.888671875,
+ "learning_rate": 8.276366225513812e-05,
+ "loss": 0.4422,
+ "step": 68600
+ },
+ {
+ "epoch": 0.35419278612740623,
+ "grad_norm": 20027.009765625,
+ "learning_rate": 8.27360589347881e-05,
+ "loss": 0.4484,
+ "step": 68650
+ },
+ {
+ "epoch": 0.354450756110019,
+ "grad_norm": 22069.64453125,
+ "learning_rate": 8.27084381405637e-05,
+ "loss": 0.443,
+ "step": 68700
+ },
+ {
+ "epoch": 0.3547087260926319,
+ "grad_norm": 23096.74609375,
+ "learning_rate": 8.26807998872084e-05,
+ "loss": 0.4437,
+ "step": 68750
+ },
+ {
+ "epoch": 0.35496669607524467,
+ "grad_norm": 19204.626953125,
+ "learning_rate": 8.265314418947494e-05,
+ "loss": 0.4496,
+ "step": 68800
+ },
+ {
+ "epoch": 0.3552246660578575,
+ "grad_norm": 26871.888671875,
+ "learning_rate": 8.262547106212541e-05,
+ "loss": 0.446,
+ "step": 68850
+ },
+ {
+ "epoch": 0.3554826360404703,
+ "grad_norm": 21342.556640625,
+ "learning_rate": 8.259778051993118e-05,
+ "loss": 0.4525,
+ "step": 68900
+ },
+ {
+ "epoch": 0.35574060602308316,
+ "grad_norm": 23054.814453125,
+ "learning_rate": 8.25700725776729e-05,
+ "loss": 0.4427,
+ "step": 68950
+ },
+ {
+ "epoch": 0.35599857600569595,
+ "grad_norm": 20473.818359375,
+ "learning_rate": 8.254234725014061e-05,
+ "loss": 0.4452,
+ "step": 69000
+ },
+ {
+ "epoch": 0.3562565459883088,
+ "grad_norm": 22081.576171875,
+ "learning_rate": 8.251460455213347e-05,
+ "loss": 0.4533,
+ "step": 69050
+ },
+ {
+ "epoch": 0.3565145159709216,
+ "grad_norm": 21840.048828125,
+ "learning_rate": 8.248684449846004e-05,
+ "loss": 0.4503,
+ "step": 69100
+ },
+ {
+ "epoch": 0.35677248595353445,
+ "grad_norm": 21595.234375,
+ "learning_rate": 8.245906710393808e-05,
+ "loss": 0.4459,
+ "step": 69150
+ },
+ {
+ "epoch": 0.3570304559361473,
+ "grad_norm": 22540.302734375,
+ "learning_rate": 8.243127238339463e-05,
+ "loss": 0.4461,
+ "step": 69200
+ },
+ {
+ "epoch": 0.3572884259187601,
+ "grad_norm": 20646.5859375,
+ "learning_rate": 8.2403460351666e-05,
+ "loss": 0.4522,
+ "step": 69250
+ },
+ {
+ "epoch": 0.35754639590137294,
+ "grad_norm": 20219.978515625,
+ "learning_rate": 8.237563102359767e-05,
+ "loss": 0.4464,
+ "step": 69300
+ },
+ {
+ "epoch": 0.35780436588398573,
+ "grad_norm": 21399.888671875,
+ "learning_rate": 8.234778441404441e-05,
+ "loss": 0.451,
+ "step": 69350
+ },
+ {
+ "epoch": 0.3580623358665986,
+ "grad_norm": 23263.193359375,
+ "learning_rate": 8.231992053787024e-05,
+ "loss": 0.4491,
+ "step": 69400
+ },
+ {
+ "epoch": 0.3583203058492114,
+ "grad_norm": 20740.455078125,
+ "learning_rate": 8.229203940994829e-05,
+ "loss": 0.4456,
+ "step": 69450
+ },
+ {
+ "epoch": 0.3585782758318242,
+ "grad_norm": 21715.078125,
+ "learning_rate": 8.226414104516102e-05,
+ "loss": 0.4467,
+ "step": 69500
+ },
+ {
+ "epoch": 0.358836245814437,
+ "grad_norm": 19771.517578125,
+ "learning_rate": 8.223622545840001e-05,
+ "loss": 0.4505,
+ "step": 69550
+ },
+ {
+ "epoch": 0.35909421579704986,
+ "grad_norm": 20944.298828125,
+ "learning_rate": 8.220829266456608e-05,
+ "loss": 0.4481,
+ "step": 69600
+ },
+ {
+ "epoch": 0.35935218577966266,
+ "grad_norm": 22313.017578125,
+ "learning_rate": 8.21803426785692e-05,
+ "loss": 0.4503,
+ "step": 69650
+ },
+ {
+ "epoch": 0.3596101557622755,
+ "grad_norm": 22525.5859375,
+ "learning_rate": 8.215237551532853e-05,
+ "loss": 0.4488,
+ "step": 69700
+ },
+ {
+ "epoch": 0.3598681257448883,
+ "grad_norm": 22731.85546875,
+ "learning_rate": 8.21243911897724e-05,
+ "loss": 0.4476,
+ "step": 69750
+ },
+ {
+ "epoch": 0.36012609572750115,
+ "grad_norm": 20872.9375,
+ "learning_rate": 8.20963897168383e-05,
+ "loss": 0.4485,
+ "step": 69800
+ },
+ {
+ "epoch": 0.360384065710114,
+ "grad_norm": 21066.095703125,
+ "learning_rate": 8.206837111147289e-05,
+ "loss": 0.4511,
+ "step": 69850
+ },
+ {
+ "epoch": 0.3606420356927268,
+ "grad_norm": 21823.62890625,
+ "learning_rate": 8.204033538863197e-05,
+ "loss": 0.4415,
+ "step": 69900
+ },
+ {
+ "epoch": 0.36090000567533964,
+ "grad_norm": 19639.724609375,
+ "learning_rate": 8.201228256328042e-05,
+ "loss": 0.4456,
+ "step": 69950
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "grad_norm": 25321.20703125,
+ "learning_rate": 8.198421265039231e-05,
+ "loss": 0.4506,
+ "step": 70000
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "eval_loss": 0.43597322702407837,
+ "eval_runtime": 3285.9769,
+ "eval_samples_per_second": 943.744,
+ "eval_steps_per_second": 1.843,
+ "step": 70000
+ },
+ {
+ "epoch": 0.3614159456405653,
+ "grad_norm": 19558.943359375,
+ "learning_rate": 8.195612566495084e-05,
+ "loss": 0.4502,
+ "step": 70050
+ },
+ {
+ "epoch": 0.3616739156231781,
+ "grad_norm": 21766.482421875,
+ "learning_rate": 8.192802162194828e-05,
+ "loss": 0.4444,
+ "step": 70100
+ },
+ {
+ "epoch": 0.3619318856057909,
+ "grad_norm": 23117.017578125,
+ "learning_rate": 8.189990053638603e-05,
+ "loss": 0.4476,
+ "step": 70150
+ },
+ {
+ "epoch": 0.3621898555884037,
+ "grad_norm": 19175.60546875,
+ "learning_rate": 8.18717624232746e-05,
+ "loss": 0.4479,
+ "step": 70200
+ },
+ {
+ "epoch": 0.36244782557101657,
+ "grad_norm": 22124.80078125,
+ "learning_rate": 8.184360729763351e-05,
+ "loss": 0.449,
+ "step": 70250
+ },
+ {
+ "epoch": 0.36270579555362936,
+ "grad_norm": 21717.501953125,
+ "learning_rate": 8.181543517449147e-05,
+ "loss": 0.4488,
+ "step": 70300
+ },
+ {
+ "epoch": 0.3629637655362422,
+ "grad_norm": 20235.162109375,
+ "learning_rate": 8.178724606888621e-05,
+ "loss": 0.4496,
+ "step": 70350
+ },
+ {
+ "epoch": 0.363221735518855,
+ "grad_norm": 22513.677734375,
+ "learning_rate": 8.175903999586455e-05,
+ "loss": 0.4463,
+ "step": 70400
+ },
+ {
+ "epoch": 0.36347970550146785,
+ "grad_norm": 21388.1953125,
+ "learning_rate": 8.173081697048228e-05,
+ "loss": 0.4446,
+ "step": 70450
+ },
+ {
+ "epoch": 0.36373767548408065,
+ "grad_norm": 20549.271484375,
+ "learning_rate": 8.170257700780435e-05,
+ "loss": 0.4421,
+ "step": 70500
+ },
+ {
+ "epoch": 0.3639956454666935,
+ "grad_norm": 21219.158203125,
+ "learning_rate": 8.16743201229047e-05,
+ "loss": 0.4472,
+ "step": 70550
+ },
+ {
+ "epoch": 0.36425361544930634,
+ "grad_norm": 20570.34375,
+ "learning_rate": 8.164604633086632e-05,
+ "loss": 0.4487,
+ "step": 70600
+ },
+ {
+ "epoch": 0.36451158543191914,
+ "grad_norm": 17376.671875,
+ "learning_rate": 8.161775564678118e-05,
+ "loss": 0.4413,
+ "step": 70650
+ },
+ {
+ "epoch": 0.364769555414532,
+ "grad_norm": 21676.33984375,
+ "learning_rate": 8.158944808575032e-05,
+ "loss": 0.4433,
+ "step": 70700
+ },
+ {
+ "epoch": 0.3650275253971448,
+ "grad_norm": 21901.001953125,
+ "learning_rate": 8.156112366288378e-05,
+ "loss": 0.4465,
+ "step": 70750
+ },
+ {
+ "epoch": 0.36528549537975763,
+ "grad_norm": 20330.720703125,
+ "learning_rate": 8.153278239330056e-05,
+ "loss": 0.4456,
+ "step": 70800
+ },
+ {
+ "epoch": 0.3655434653623704,
+ "grad_norm": 22179.904296875,
+ "learning_rate": 8.15044242921287e-05,
+ "loss": 0.4465,
+ "step": 70850
+ },
+ {
+ "epoch": 0.3658014353449833,
+ "grad_norm": 21384.66015625,
+ "learning_rate": 8.14760493745052e-05,
+ "loss": 0.4476,
+ "step": 70900
+ },
+ {
+ "epoch": 0.36605940532759607,
+ "grad_norm": 21706.103515625,
+ "learning_rate": 8.144765765557604e-05,
+ "loss": 0.4475,
+ "step": 70950
+ },
+ {
+ "epoch": 0.3663173753102089,
+ "grad_norm": 20332.5,
+ "learning_rate": 8.141924915049617e-05,
+ "loss": 0.449,
+ "step": 71000
+ },
+ {
+ "epoch": 0.3665753452928217,
+ "grad_norm": 22648.640625,
+ "learning_rate": 8.139082387442951e-05,
+ "loss": 0.4566,
+ "step": 71050
+ },
+ {
+ "epoch": 0.36683331527543456,
+ "grad_norm": 21496.291015625,
+ "learning_rate": 8.136238184254892e-05,
+ "loss": 0.4493,
+ "step": 71100
+ },
+ {
+ "epoch": 0.36709128525804735,
+ "grad_norm": 22114.169921875,
+ "learning_rate": 8.133392307003618e-05,
+ "loss": 0.4441,
+ "step": 71150
+ },
+ {
+ "epoch": 0.3673492552406602,
+ "grad_norm": 22476.390625,
+ "learning_rate": 8.130544757208205e-05,
+ "loss": 0.4391,
+ "step": 71200
+ },
+ {
+ "epoch": 0.367607225223273,
+ "grad_norm": 22175.044921875,
+ "learning_rate": 8.127695536388623e-05,
+ "loss": 0.4439,
+ "step": 71250
+ },
+ {
+ "epoch": 0.36786519520588584,
+ "grad_norm": 19715.728515625,
+ "learning_rate": 8.124844646065724e-05,
+ "loss": 0.448,
+ "step": 71300
+ },
+ {
+ "epoch": 0.3681231651884987,
+ "grad_norm": 19609.146484375,
+ "learning_rate": 8.121992087761266e-05,
+ "loss": 0.4476,
+ "step": 71350
+ },
+ {
+ "epoch": 0.3683811351711115,
+ "grad_norm": 21872.12890625,
+ "learning_rate": 8.119137862997883e-05,
+ "loss": 0.4536,
+ "step": 71400
+ },
+ {
+ "epoch": 0.36863910515372433,
+ "grad_norm": 19710.619140625,
+ "learning_rate": 8.116281973299107e-05,
+ "loss": 0.4466,
+ "step": 71450
+ },
+ {
+ "epoch": 0.3688970751363371,
+ "grad_norm": 21783.138671875,
+ "learning_rate": 8.113424420189357e-05,
+ "loss": 0.4422,
+ "step": 71500
+ },
+ {
+ "epoch": 0.36915504511895,
+ "grad_norm": 20527.984375,
+ "learning_rate": 8.110565205193941e-05,
+ "loss": 0.4499,
+ "step": 71550
+ },
+ {
+ "epoch": 0.36941301510156277,
+ "grad_norm": 21693.171875,
+ "learning_rate": 8.10770432983905e-05,
+ "loss": 0.4465,
+ "step": 71600
+ },
+ {
+ "epoch": 0.3696709850841756,
+ "grad_norm": 19817.142578125,
+ "learning_rate": 8.104841795651765e-05,
+ "loss": 0.4471,
+ "step": 71650
+ },
+ {
+ "epoch": 0.3699289550667884,
+ "grad_norm": 20883.767578125,
+ "learning_rate": 8.101977604160052e-05,
+ "loss": 0.4507,
+ "step": 71700
+ },
+ {
+ "epoch": 0.37018692504940126,
+ "grad_norm": 21206.943359375,
+ "learning_rate": 8.099111756892759e-05,
+ "loss": 0.4415,
+ "step": 71750
+ },
+ {
+ "epoch": 0.37044489503201405,
+ "grad_norm": 21431.19140625,
+ "learning_rate": 8.096244255379621e-05,
+ "loss": 0.4542,
+ "step": 71800
+ },
+ {
+ "epoch": 0.3707028650146269,
+ "grad_norm": 23020.34375,
+ "learning_rate": 8.093375101151255e-05,
+ "loss": 0.4481,
+ "step": 71850
+ },
+ {
+ "epoch": 0.3709608349972397,
+ "grad_norm": 20704.1171875,
+ "learning_rate": 8.09050429573916e-05,
+ "loss": 0.4427,
+ "step": 71900
+ },
+ {
+ "epoch": 0.37121880497985255,
+ "grad_norm": 20195.037109375,
+ "learning_rate": 8.087631840675715e-05,
+ "loss": 0.4416,
+ "step": 71950
+ },
+ {
+ "epoch": 0.3714767749624654,
+ "grad_norm": 21187.99609375,
+ "learning_rate": 8.084757737494184e-05,
+ "loss": 0.452,
+ "step": 72000
+ },
+ {
+ "epoch": 0.3717347449450782,
+ "grad_norm": 20694.912109375,
+ "learning_rate": 8.081881987728703e-05,
+ "loss": 0.4416,
+ "step": 72050
+ },
+ {
+ "epoch": 0.37199271492769104,
+ "grad_norm": 23006.939453125,
+ "learning_rate": 8.079004592914297e-05,
+ "loss": 0.4426,
+ "step": 72100
+ },
+ {
+ "epoch": 0.37225068491030383,
+ "grad_norm": 21854.025390625,
+ "learning_rate": 8.076125554586859e-05,
+ "loss": 0.4453,
+ "step": 72150
+ },
+ {
+ "epoch": 0.3725086548929167,
+ "grad_norm": 19155.400390625,
+ "learning_rate": 8.073244874283166e-05,
+ "loss": 0.4539,
+ "step": 72200
+ },
+ {
+ "epoch": 0.3727666248755295,
+ "grad_norm": 22085.5625,
+ "learning_rate": 8.070362553540869e-05,
+ "loss": 0.4474,
+ "step": 72250
+ },
+ {
+ "epoch": 0.3730245948581423,
+ "grad_norm": 21225.626953125,
+ "learning_rate": 8.067478593898495e-05,
+ "loss": 0.4431,
+ "step": 72300
+ },
+ {
+ "epoch": 0.3732825648407551,
+ "grad_norm": 21605.546875,
+ "learning_rate": 8.064592996895446e-05,
+ "loss": 0.4534,
+ "step": 72350
+ },
+ {
+ "epoch": 0.37354053482336796,
+ "grad_norm": 20774.87109375,
+ "learning_rate": 8.061705764071999e-05,
+ "loss": 0.4462,
+ "step": 72400
+ },
+ {
+ "epoch": 0.37379850480598076,
+ "grad_norm": 21871.390625,
+ "learning_rate": 8.0588168969693e-05,
+ "loss": 0.4445,
+ "step": 72450
+ },
+ {
+ "epoch": 0.3740564747885936,
+ "grad_norm": 22102.560546875,
+ "learning_rate": 8.05592639712937e-05,
+ "loss": 0.4478,
+ "step": 72500
+ },
+ {
+ "epoch": 0.3743144447712064,
+ "grad_norm": 21172.283203125,
+ "learning_rate": 8.053034266095105e-05,
+ "loss": 0.4469,
+ "step": 72550
+ },
+ {
+ "epoch": 0.37457241475381925,
+ "grad_norm": 21827.390625,
+ "learning_rate": 8.050140505410268e-05,
+ "loss": 0.4485,
+ "step": 72600
+ },
+ {
+ "epoch": 0.37483038473643204,
+ "grad_norm": 21271.87890625,
+ "learning_rate": 8.047245116619492e-05,
+ "loss": 0.45,
+ "step": 72650
+ },
+ {
+ "epoch": 0.3750883547190449,
+ "grad_norm": 21192.6484375,
+ "learning_rate": 8.04434810126828e-05,
+ "loss": 0.442,
+ "step": 72700
+ },
+ {
+ "epoch": 0.37534632470165774,
+ "grad_norm": 21529.736328125,
+ "learning_rate": 8.041449460903001e-05,
+ "loss": 0.4462,
+ "step": 72750
+ },
+ {
+ "epoch": 0.37560429468427053,
+ "grad_norm": 18609.474609375,
+ "learning_rate": 8.038549197070893e-05,
+ "loss": 0.4436,
+ "step": 72800
+ },
+ {
+ "epoch": 0.3758622646668834,
+ "grad_norm": 21631.82421875,
+ "learning_rate": 8.035647311320062e-05,
+ "loss": 0.4507,
+ "step": 72850
+ },
+ {
+ "epoch": 0.3761202346494962,
+ "grad_norm": 22347.056640625,
+ "learning_rate": 8.03274380519948e-05,
+ "loss": 0.4472,
+ "step": 72900
+ },
+ {
+ "epoch": 0.376378204632109,
+ "grad_norm": 20416.37109375,
+ "learning_rate": 8.029838680258979e-05,
+ "loss": 0.4475,
+ "step": 72950
+ },
+ {
+ "epoch": 0.3766361746147218,
+ "grad_norm": 21952.27734375,
+ "learning_rate": 8.026931938049259e-05,
+ "loss": 0.4449,
+ "step": 73000
+ },
+ {
+ "epoch": 0.37689414459733467,
+ "grad_norm": 23068.12109375,
+ "learning_rate": 8.024023580121885e-05,
+ "loss": 0.4477,
+ "step": 73050
+ },
+ {
+ "epoch": 0.37715211457994746,
+ "grad_norm": 21956.462890625,
+ "learning_rate": 8.021113608029281e-05,
+ "loss": 0.4459,
+ "step": 73100
+ },
+ {
+ "epoch": 0.3774100845625603,
+ "grad_norm": 20933.28125,
+ "learning_rate": 8.018202023324733e-05,
+ "loss": 0.4481,
+ "step": 73150
+ },
+ {
+ "epoch": 0.3776680545451731,
+ "grad_norm": 23138.638671875,
+ "learning_rate": 8.015288827562389e-05,
+ "loss": 0.437,
+ "step": 73200
+ },
+ {
+ "epoch": 0.37792602452778595,
+ "grad_norm": 20973.119140625,
+ "learning_rate": 8.012374022297255e-05,
+ "loss": 0.4454,
+ "step": 73250
+ },
+ {
+ "epoch": 0.37818399451039875,
+ "grad_norm": 21328.29296875,
+ "learning_rate": 8.0094576090852e-05,
+ "loss": 0.4426,
+ "step": 73300
+ },
+ {
+ "epoch": 0.3784419644930116,
+ "grad_norm": 20653.591796875,
+ "learning_rate": 8.006539589482949e-05,
+ "loss": 0.4448,
+ "step": 73350
+ },
+ {
+ "epoch": 0.3786999344756244,
+ "grad_norm": 21520.181640625,
+ "learning_rate": 8.003619965048083e-05,
+ "loss": 0.4428,
+ "step": 73400
+ },
+ {
+ "epoch": 0.37895790445823724,
+ "grad_norm": 20736.89453125,
+ "learning_rate": 8.000698737339041e-05,
+ "loss": 0.4483,
+ "step": 73450
+ },
+ {
+ "epoch": 0.3792158744408501,
+ "grad_norm": 23887.587890625,
+ "learning_rate": 7.997775907915118e-05,
+ "loss": 0.4518,
+ "step": 73500
+ },
+ {
+ "epoch": 0.3794738444234629,
+ "grad_norm": 23771.8671875,
+ "learning_rate": 7.994851478336465e-05,
+ "loss": 0.4479,
+ "step": 73550
+ },
+ {
+ "epoch": 0.37973181440607573,
+ "grad_norm": 21563.27734375,
+ "learning_rate": 7.991925450164084e-05,
+ "loss": 0.4433,
+ "step": 73600
+ },
+ {
+ "epoch": 0.3799897843886885,
+ "grad_norm": 21403.751953125,
+ "learning_rate": 7.988997824959832e-05,
+ "loss": 0.4443,
+ "step": 73650
+ },
+ {
+ "epoch": 0.38024775437130137,
+ "grad_norm": 22136.51171875,
+ "learning_rate": 7.986068604286421e-05,
+ "loss": 0.446,
+ "step": 73700
+ },
+ {
+ "epoch": 0.38050572435391417,
+ "grad_norm": 22143.857421875,
+ "learning_rate": 7.98313778970741e-05,
+ "loss": 0.4416,
+ "step": 73750
+ },
+ {
+ "epoch": 0.380763694336527,
+ "grad_norm": 22035.1171875,
+ "learning_rate": 7.980205382787211e-05,
+ "loss": 0.4413,
+ "step": 73800
+ },
+ {
+ "epoch": 0.3810216643191398,
+ "grad_norm": 21744.25390625,
+ "learning_rate": 7.97727138509109e-05,
+ "loss": 0.4463,
+ "step": 73850
+ },
+ {
+ "epoch": 0.38127963430175266,
+ "grad_norm": 21739.26171875,
+ "learning_rate": 7.974335798185153e-05,
+ "loss": 0.4415,
+ "step": 73900
+ },
+ {
+ "epoch": 0.38153760428436545,
+ "grad_norm": 20974.59765625,
+ "learning_rate": 7.971398623636361e-05,
+ "loss": 0.4457,
+ "step": 73950
+ },
+ {
+ "epoch": 0.3817955742669783,
+ "grad_norm": 19807.79296875,
+ "learning_rate": 7.968459863012523e-05,
+ "loss": 0.4423,
+ "step": 74000
+ },
+ {
+ "epoch": 0.3820535442495911,
+ "grad_norm": 21711.158203125,
+ "learning_rate": 7.96551951788229e-05,
+ "loss": 0.4466,
+ "step": 74050
+ },
+ {
+ "epoch": 0.38231151423220394,
+ "grad_norm": 19187.47265625,
+ "learning_rate": 7.962577589815163e-05,
+ "loss": 0.4387,
+ "step": 74100
+ },
+ {
+ "epoch": 0.3825694842148168,
+ "grad_norm": 19402.611328125,
+ "learning_rate": 7.959634080381486e-05,
+ "loss": 0.444,
+ "step": 74150
+ },
+ {
+ "epoch": 0.3828274541974296,
+ "grad_norm": 21287.9765625,
+ "learning_rate": 7.956688991152445e-05,
+ "loss": 0.4386,
+ "step": 74200
+ },
+ {
+ "epoch": 0.38308542418004243,
+ "grad_norm": 20430.591796875,
+ "learning_rate": 7.953742323700075e-05,
+ "loss": 0.4453,
+ "step": 74250
+ },
+ {
+ "epoch": 0.3833433941626552,
+ "grad_norm": 23246.041015625,
+ "learning_rate": 7.950794079597248e-05,
+ "loss": 0.4448,
+ "step": 74300
+ },
+ {
+ "epoch": 0.3836013641452681,
+ "grad_norm": 23098.74609375,
+ "learning_rate": 7.94784426041768e-05,
+ "loss": 0.4449,
+ "step": 74350
+ },
+ {
+ "epoch": 0.38385933412788087,
+ "grad_norm": 21504.71484375,
+ "learning_rate": 7.944892867735929e-05,
+ "loss": 0.4423,
+ "step": 74400
+ },
+ {
+ "epoch": 0.3841173041104937,
+ "grad_norm": 20115.0859375,
+ "learning_rate": 7.941939903127386e-05,
+ "loss": 0.4462,
+ "step": 74450
+ },
+ {
+ "epoch": 0.3843752740931065,
+ "grad_norm": 20473.681640625,
+ "learning_rate": 7.938985368168293e-05,
+ "loss": 0.4541,
+ "step": 74500
+ },
+ {
+ "epoch": 0.38463324407571936,
+ "grad_norm": 19664.6640625,
+ "learning_rate": 7.93602926443572e-05,
+ "loss": 0.4439,
+ "step": 74550
+ },
+ {
+ "epoch": 0.38489121405833215,
+ "grad_norm": 20806.474609375,
+ "learning_rate": 7.933071593507579e-05,
+ "loss": 0.439,
+ "step": 74600
+ },
+ {
+ "epoch": 0.385149184040945,
+ "grad_norm": 20905.197265625,
+ "learning_rate": 7.930112356962618e-05,
+ "loss": 0.444,
+ "step": 74650
+ },
+ {
+ "epoch": 0.3854071540235578,
+ "grad_norm": 26333.470703125,
+ "learning_rate": 7.927151556380417e-05,
+ "loss": 0.4462,
+ "step": 74700
+ },
+ {
+ "epoch": 0.38566512400617065,
+ "grad_norm": 20478.18359375,
+ "learning_rate": 7.924189193341396e-05,
+ "loss": 0.4456,
+ "step": 74750
+ },
+ {
+ "epoch": 0.38592309398878344,
+ "grad_norm": 20605.662109375,
+ "learning_rate": 7.921225269426808e-05,
+ "loss": 0.4412,
+ "step": 74800
+ },
+ {
+ "epoch": 0.3861810639713963,
+ "grad_norm": 23029.943359375,
+ "learning_rate": 7.918259786218738e-05,
+ "loss": 0.4427,
+ "step": 74850
+ },
+ {
+ "epoch": 0.38643903395400914,
+ "grad_norm": 23275.130859375,
+ "learning_rate": 7.915292745300103e-05,
+ "loss": 0.4436,
+ "step": 74900
+ },
+ {
+ "epoch": 0.38669700393662193,
+ "grad_norm": 22123.671875,
+ "learning_rate": 7.91232414825465e-05,
+ "loss": 0.4456,
+ "step": 74950
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "grad_norm": 22476.365234375,
+ "learning_rate": 7.909353996666961e-05,
+ "loss": 0.4424,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "eval_loss": 0.43277591466903687,
+ "eval_runtime": 3260.4686,
+ "eval_samples_per_second": 951.127,
+ "eval_steps_per_second": 1.858,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3872129439018476,
+ "grad_norm": 22150.966796875,
+ "learning_rate": 7.906382292122448e-05,
+ "loss": 0.4407,
+ "step": 75050
+ },
+ {
+ "epoch": 0.3874709138844604,
+ "grad_norm": 20100.5625,
+ "learning_rate": 7.903409036207343e-05,
+ "loss": 0.4443,
+ "step": 75100
+ },
+ {
+ "epoch": 0.3877288838670732,
+ "grad_norm": 22078.353515625,
+ "learning_rate": 7.900434230508715e-05,
+ "loss": 0.4468,
+ "step": 75150
+ },
+ {
+ "epoch": 0.38798685384968606,
+ "grad_norm": 20395.498046875,
+ "learning_rate": 7.897457876614461e-05,
+ "loss": 0.4424,
+ "step": 75200
+ },
+ {
+ "epoch": 0.38824482383229886,
+ "grad_norm": 23190.4140625,
+ "learning_rate": 7.894479976113298e-05,
+ "loss": 0.4394,
+ "step": 75250
+ },
+ {
+ "epoch": 0.3885027938149117,
+ "grad_norm": 21523.7265625,
+ "learning_rate": 7.891500530594771e-05,
+ "loss": 0.4441,
+ "step": 75300
+ },
+ {
+ "epoch": 0.3887607637975245,
+ "grad_norm": 22941.23828125,
+ "learning_rate": 7.888519541649253e-05,
+ "loss": 0.443,
+ "step": 75350
+ },
+ {
+ "epoch": 0.38901873378013735,
+ "grad_norm": 21467.90234375,
+ "learning_rate": 7.885537010867936e-05,
+ "loss": 0.4478,
+ "step": 75400
+ },
+ {
+ "epoch": 0.38927670376275014,
+ "grad_norm": 22635.732421875,
+ "learning_rate": 7.882552939842837e-05,
+ "loss": 0.4415,
+ "step": 75450
+ },
+ {
+ "epoch": 0.389534673745363,
+ "grad_norm": 21242.326171875,
+ "learning_rate": 7.879567330166797e-05,
+ "loss": 0.4352,
+ "step": 75500
+ },
+ {
+ "epoch": 0.38979264372797584,
+ "grad_norm": 20005.158203125,
+ "learning_rate": 7.876580183433475e-05,
+ "loss": 0.4393,
+ "step": 75550
+ },
+ {
+ "epoch": 0.39005061371058863,
+ "grad_norm": 23355.044921875,
+ "learning_rate": 7.873591501237351e-05,
+ "loss": 0.4465,
+ "step": 75600
+ },
+ {
+ "epoch": 0.3903085836932015,
+ "grad_norm": 21217.359375,
+ "learning_rate": 7.870601285173731e-05,
+ "loss": 0.4437,
+ "step": 75650
+ },
+ {
+ "epoch": 0.3905665536758143,
+ "grad_norm": 22424.580078125,
+ "learning_rate": 7.867609536838729e-05,
+ "loss": 0.4397,
+ "step": 75700
+ },
+ {
+ "epoch": 0.3908245236584271,
+ "grad_norm": 20943.65234375,
+ "learning_rate": 7.864616257829285e-05,
+ "loss": 0.4427,
+ "step": 75750
+ },
+ {
+ "epoch": 0.3910824936410399,
+ "grad_norm": 23246.5625,
+ "learning_rate": 7.861621449743152e-05,
+ "loss": 0.4479,
+ "step": 75800
+ },
+ {
+ "epoch": 0.39134046362365277,
+ "grad_norm": 21575.830078125,
+ "learning_rate": 7.858625114178902e-05,
+ "loss": 0.4384,
+ "step": 75850
+ },
+ {
+ "epoch": 0.39159843360626556,
+ "grad_norm": 22053.5546875,
+ "learning_rate": 7.855627252735918e-05,
+ "loss": 0.4364,
+ "step": 75900
+ },
+ {
+ "epoch": 0.3918564035888784,
+ "grad_norm": 21934.55078125,
+ "learning_rate": 7.852627867014406e-05,
+ "loss": 0.4466,
+ "step": 75950
+ },
+ {
+ "epoch": 0.3921143735714912,
+ "grad_norm": 20184.078125,
+ "learning_rate": 7.849626958615374e-05,
+ "loss": 0.4422,
+ "step": 76000
+ },
+ {
+ "epoch": 0.39237234355410405,
+ "grad_norm": 21770.923828125,
+ "learning_rate": 7.846624529140652e-05,
+ "loss": 0.4382,
+ "step": 76050
+ },
+ {
+ "epoch": 0.39263031353671685,
+ "grad_norm": 21592.16796875,
+ "learning_rate": 7.843620580192877e-05,
+ "loss": 0.4404,
+ "step": 76100
+ },
+ {
+ "epoch": 0.3928882835193297,
+ "grad_norm": 19634.1875,
+ "learning_rate": 7.8406151133755e-05,
+ "loss": 0.4443,
+ "step": 76150
+ },
+ {
+ "epoch": 0.3931462535019425,
+ "grad_norm": 24045.01171875,
+ "learning_rate": 7.837608130292782e-05,
+ "loss": 0.438,
+ "step": 76200
+ },
+ {
+ "epoch": 0.39340422348455534,
+ "grad_norm": 21739.921875,
+ "learning_rate": 7.83459963254979e-05,
+ "loss": 0.4474,
+ "step": 76250
+ },
+ {
+ "epoch": 0.3936621934671682,
+ "grad_norm": 20915.56640625,
+ "learning_rate": 7.831589621752405e-05,
+ "loss": 0.4463,
+ "step": 76300
+ },
+ {
+ "epoch": 0.393920163449781,
+ "grad_norm": 18799.80078125,
+ "learning_rate": 7.828578099507308e-05,
+ "loss": 0.4401,
+ "step": 76350
+ },
+ {
+ "epoch": 0.39417813343239383,
+ "grad_norm": 19029.51171875,
+ "learning_rate": 7.825565067421995e-05,
+ "loss": 0.4428,
+ "step": 76400
+ },
+ {
+ "epoch": 0.3944361034150066,
+ "grad_norm": 22817.376953125,
+ "learning_rate": 7.822550527104762e-05,
+ "loss": 0.4467,
+ "step": 76450
+ },
+ {
+ "epoch": 0.39469407339761947,
+ "grad_norm": 19165.529296875,
+ "learning_rate": 7.819534480164713e-05,
+ "loss": 0.4365,
+ "step": 76500
+ },
+ {
+ "epoch": 0.39495204338023226,
+ "grad_norm": 22980.056640625,
+ "learning_rate": 7.816516928211756e-05,
+ "loss": 0.4386,
+ "step": 76550
+ },
+ {
+ "epoch": 0.3952100133628451,
+ "grad_norm": 21261.7109375,
+ "learning_rate": 7.813497872856603e-05,
+ "loss": 0.4358,
+ "step": 76600
+ },
+ {
+ "epoch": 0.3954679833454579,
+ "grad_norm": 21533.779296875,
+ "learning_rate": 7.810477315710763e-05,
+ "loss": 0.4444,
+ "step": 76650
+ },
+ {
+ "epoch": 0.39572595332807076,
+ "grad_norm": 20503.556640625,
+ "learning_rate": 7.807455258386556e-05,
+ "loss": 0.4446,
+ "step": 76700
+ },
+ {
+ "epoch": 0.39598392331068355,
+ "grad_norm": 21180.939453125,
+ "learning_rate": 7.804431702497093e-05,
+ "loss": 0.4486,
+ "step": 76750
+ },
+ {
+ "epoch": 0.3962418932932964,
+ "grad_norm": 24126.484375,
+ "learning_rate": 7.801406649656294e-05,
+ "loss": 0.4419,
+ "step": 76800
+ },
+ {
+ "epoch": 0.3964998632759092,
+ "grad_norm": 19791.345703125,
+ "learning_rate": 7.79838010147887e-05,
+ "loss": 0.4499,
+ "step": 76850
+ },
+ {
+ "epoch": 0.39675783325852204,
+ "grad_norm": 21118.822265625,
+ "learning_rate": 7.795352059580334e-05,
+ "loss": 0.4403,
+ "step": 76900
+ },
+ {
+ "epoch": 0.39701580324113483,
+ "grad_norm": 20787.6015625,
+ "learning_rate": 7.792322525577e-05,
+ "loss": 0.4394,
+ "step": 76950
+ },
+ {
+ "epoch": 0.3972737732237477,
+ "grad_norm": 21575.86328125,
+ "learning_rate": 7.789291501085972e-05,
+ "loss": 0.4482,
+ "step": 77000
+ },
+ {
+ "epoch": 0.39753174320636053,
+ "grad_norm": 21271.287109375,
+ "learning_rate": 7.78625898772515e-05,
+ "loss": 0.4413,
+ "step": 77050
+ },
+ {
+ "epoch": 0.3977897131889733,
+ "grad_norm": 21294.7890625,
+ "learning_rate": 7.783224987113235e-05,
+ "loss": 0.4393,
+ "step": 77100
+ },
+ {
+ "epoch": 0.3980476831715862,
+ "grad_norm": 21880.341796875,
+ "learning_rate": 7.780189500869716e-05,
+ "loss": 0.4464,
+ "step": 77150
+ },
+ {
+ "epoch": 0.39830565315419897,
+ "grad_norm": 22501.482421875,
+ "learning_rate": 7.777152530614876e-05,
+ "loss": 0.4384,
+ "step": 77200
+ },
+ {
+ "epoch": 0.3985636231368118,
+ "grad_norm": 20404.89453125,
+ "learning_rate": 7.774114077969792e-05,
+ "loss": 0.4355,
+ "step": 77250
+ },
+ {
+ "epoch": 0.3988215931194246,
+ "grad_norm": 21435.66015625,
+ "learning_rate": 7.77107414455633e-05,
+ "loss": 0.4468,
+ "step": 77300
+ },
+ {
+ "epoch": 0.39907956310203746,
+ "grad_norm": 20239.091796875,
+ "learning_rate": 7.768032731997148e-05,
+ "loss": 0.4453,
+ "step": 77350
+ },
+ {
+ "epoch": 0.39933753308465025,
+ "grad_norm": 19040.37109375,
+ "learning_rate": 7.764989841915694e-05,
+ "loss": 0.4487,
+ "step": 77400
+ },
+ {
+ "epoch": 0.3995955030672631,
+ "grad_norm": 22501.13671875,
+ "learning_rate": 7.761945475936203e-05,
+ "loss": 0.4488,
+ "step": 77450
+ },
+ {
+ "epoch": 0.3998534730498759,
+ "grad_norm": 20773.27734375,
+ "learning_rate": 7.7588996356837e-05,
+ "loss": 0.4384,
+ "step": 77500
+ },
+ {
+ "epoch": 0.40011144303248874,
+ "grad_norm": 22598.4140625,
+ "learning_rate": 7.755852322783994e-05,
+ "loss": 0.4358,
+ "step": 77550
+ },
+ {
+ "epoch": 0.40036941301510154,
+ "grad_norm": 20656.033203125,
+ "learning_rate": 7.752803538863683e-05,
+ "loss": 0.4434,
+ "step": 77600
+ },
+ {
+ "epoch": 0.4006273829977144,
+ "grad_norm": 20882.3125,
+ "learning_rate": 7.749753285550146e-05,
+ "loss": 0.4408,
+ "step": 77650
+ },
+ {
+ "epoch": 0.40088535298032724,
+ "grad_norm": 19519.408203125,
+ "learning_rate": 7.746701564471553e-05,
+ "loss": 0.439,
+ "step": 77700
+ },
+ {
+ "epoch": 0.40114332296294003,
+ "grad_norm": 21141.80859375,
+ "learning_rate": 7.74364837725685e-05,
+ "loss": 0.4422,
+ "step": 77750
+ },
+ {
+ "epoch": 0.4014012929455529,
+ "grad_norm": 21487.45703125,
+ "learning_rate": 7.74059372553577e-05,
+ "loss": 0.429,
+ "step": 77800
+ },
+ {
+ "epoch": 0.4016592629281657,
+ "grad_norm": 19889.447265625,
+ "learning_rate": 7.737537610938829e-05,
+ "loss": 0.4474,
+ "step": 77850
+ },
+ {
+ "epoch": 0.4019172329107785,
+ "grad_norm": 21914.947265625,
+ "learning_rate": 7.73448003509732e-05,
+ "loss": 0.4403,
+ "step": 77900
+ },
+ {
+ "epoch": 0.4021752028933913,
+ "grad_norm": 24025.521484375,
+ "learning_rate": 7.731420999643319e-05,
+ "loss": 0.4432,
+ "step": 77950
+ },
+ {
+ "epoch": 0.40243317287600416,
+ "grad_norm": 19703.50390625,
+ "learning_rate": 7.728360506209679e-05,
+ "loss": 0.443,
+ "step": 78000
+ },
+ {
+ "epoch": 0.40269114285861696,
+ "grad_norm": 21566.37890625,
+ "learning_rate": 7.725298556430034e-05,
+ "loss": 0.448,
+ "step": 78050
+ },
+ {
+ "epoch": 0.4029491128412298,
+ "grad_norm": 21902.564453125,
+ "learning_rate": 7.72223515193879e-05,
+ "loss": 0.438,
+ "step": 78100
+ },
+ {
+ "epoch": 0.4032070828238426,
+ "grad_norm": 20892.7578125,
+ "learning_rate": 7.719170294371136e-05,
+ "loss": 0.4382,
+ "step": 78150
+ },
+ {
+ "epoch": 0.40346505280645545,
+ "grad_norm": 21648.673828125,
+ "learning_rate": 7.716103985363033e-05,
+ "loss": 0.4378,
+ "step": 78200
+ },
+ {
+ "epoch": 0.40372302278906824,
+ "grad_norm": 23124.40625,
+ "learning_rate": 7.713036226551215e-05,
+ "loss": 0.442,
+ "step": 78250
+ },
+ {
+ "epoch": 0.4039809927716811,
+ "grad_norm": 25006.751953125,
+ "learning_rate": 7.709967019573195e-05,
+ "loss": 0.4397,
+ "step": 78300
+ },
+ {
+ "epoch": 0.4042389627542939,
+ "grad_norm": 20722.802734375,
+ "learning_rate": 7.706896366067256e-05,
+ "loss": 0.4388,
+ "step": 78350
+ },
+ {
+ "epoch": 0.40449693273690673,
+ "grad_norm": 20202.013671875,
+ "learning_rate": 7.703824267672452e-05,
+ "loss": 0.4404,
+ "step": 78400
+ },
+ {
+ "epoch": 0.4047549027195196,
+ "grad_norm": 21261.9375,
+ "learning_rate": 7.700750726028609e-05,
+ "loss": 0.4369,
+ "step": 78450
+ },
+ {
+ "epoch": 0.4050128727021324,
+ "grad_norm": 25343.57421875,
+ "learning_rate": 7.69767574277633e-05,
+ "loss": 0.4444,
+ "step": 78500
+ },
+ {
+ "epoch": 0.4052708426847452,
+ "grad_norm": 20222.767578125,
+ "learning_rate": 7.694599319556972e-05,
+ "loss": 0.4425,
+ "step": 78550
+ },
+ {
+ "epoch": 0.405528812667358,
+ "grad_norm": 22934.466796875,
+ "learning_rate": 7.691521458012678e-05,
+ "loss": 0.4411,
+ "step": 78600
+ },
+ {
+ "epoch": 0.40578678264997087,
+ "grad_norm": 22235.30078125,
+ "learning_rate": 7.688442159786346e-05,
+ "loss": 0.4445,
+ "step": 78650
+ },
+ {
+ "epoch": 0.40604475263258366,
+ "grad_norm": 21313.986328125,
+ "learning_rate": 7.68536142652165e-05,
+ "loss": 0.4341,
+ "step": 78700
+ },
+ {
+ "epoch": 0.4063027226151965,
+ "grad_norm": 20130.53515625,
+ "learning_rate": 7.68227925986302e-05,
+ "loss": 0.4395,
+ "step": 78750
+ },
+ {
+ "epoch": 0.4065606925978093,
+ "grad_norm": 19342.740234375,
+ "learning_rate": 7.679195661455664e-05,
+ "loss": 0.4424,
+ "step": 78800
+ },
+ {
+ "epoch": 0.40681866258042215,
+ "grad_norm": 21876.705078125,
+ "learning_rate": 7.676110632945543e-05,
+ "loss": 0.4415,
+ "step": 78850
+ },
+ {
+ "epoch": 0.40707663256303495,
+ "grad_norm": 23199.501953125,
+ "learning_rate": 7.673024175979384e-05,
+ "loss": 0.4423,
+ "step": 78900
+ },
+ {
+ "epoch": 0.4073346025456478,
+ "grad_norm": 22781.091796875,
+ "learning_rate": 7.669936292204683e-05,
+ "loss": 0.4398,
+ "step": 78950
+ },
+ {
+ "epoch": 0.4075925725282606,
+ "grad_norm": 24025.9375,
+ "learning_rate": 7.666846983269688e-05,
+ "loss": 0.4326,
+ "step": 79000
+ },
+ {
+ "epoch": 0.40785054251087344,
+ "grad_norm": 20797.056640625,
+ "learning_rate": 7.663756250823413e-05,
+ "loss": 0.4388,
+ "step": 79050
+ },
+ {
+ "epoch": 0.40810851249348623,
+ "grad_norm": 25106.67578125,
+ "learning_rate": 7.660664096515632e-05,
+ "loss": 0.4385,
+ "step": 79100
+ },
+ {
+ "epoch": 0.4083664824760991,
+ "grad_norm": 22217.36328125,
+ "learning_rate": 7.657570521996877e-05,
+ "loss": 0.4455,
+ "step": 79150
+ },
+ {
+ "epoch": 0.40862445245871193,
+ "grad_norm": 21679.291015625,
+ "learning_rate": 7.654475528918439e-05,
+ "loss": 0.4409,
+ "step": 79200
+ },
+ {
+ "epoch": 0.4088824224413247,
+ "grad_norm": 20133.583984375,
+ "learning_rate": 7.651379118932364e-05,
+ "loss": 0.4391,
+ "step": 79250
+ },
+ {
+ "epoch": 0.40914039242393757,
+ "grad_norm": 23019.171875,
+ "learning_rate": 7.648281293691457e-05,
+ "loss": 0.446,
+ "step": 79300
+ },
+ {
+ "epoch": 0.40939836240655036,
+ "grad_norm": 24098.38671875,
+ "learning_rate": 7.645182054849276e-05,
+ "loss": 0.4417,
+ "step": 79350
+ },
+ {
+ "epoch": 0.4096563323891632,
+ "grad_norm": 23057.240234375,
+ "learning_rate": 7.642081404060136e-05,
+ "loss": 0.4424,
+ "step": 79400
+ },
+ {
+ "epoch": 0.409914302371776,
+ "grad_norm": 20033.328125,
+ "learning_rate": 7.638979342979103e-05,
+ "loss": 0.4386,
+ "step": 79450
+ },
+ {
+ "epoch": 0.41017227235438886,
+ "grad_norm": 20978.68359375,
+ "learning_rate": 7.635875873261995e-05,
+ "loss": 0.4363,
+ "step": 79500
+ },
+ {
+ "epoch": 0.41043024233700165,
+ "grad_norm": 21347.068359375,
+ "learning_rate": 7.63277099656539e-05,
+ "loss": 0.4431,
+ "step": 79550
+ },
+ {
+ "epoch": 0.4106882123196145,
+ "grad_norm": 22031.8125,
+ "learning_rate": 7.629664714546604e-05,
+ "loss": 0.4313,
+ "step": 79600
+ },
+ {
+ "epoch": 0.4109461823022273,
+ "grad_norm": 23963.99609375,
+ "learning_rate": 7.626557028863717e-05,
+ "loss": 0.4363,
+ "step": 79650
+ },
+ {
+ "epoch": 0.41120415228484014,
+ "grad_norm": 20183.259765625,
+ "learning_rate": 7.623447941175548e-05,
+ "loss": 0.4419,
+ "step": 79700
+ },
+ {
+ "epoch": 0.41146212226745293,
+ "grad_norm": 23588.68359375,
+ "learning_rate": 7.620337453141667e-05,
+ "loss": 0.4388,
+ "step": 79750
+ },
+ {
+ "epoch": 0.4117200922500658,
+ "grad_norm": 22210.7265625,
+ "learning_rate": 7.617225566422395e-05,
+ "loss": 0.442,
+ "step": 79800
+ },
+ {
+ "epoch": 0.41197806223267863,
+ "grad_norm": 18647.93359375,
+ "learning_rate": 7.614112282678794e-05,
+ "loss": 0.4349,
+ "step": 79850
+ },
+ {
+ "epoch": 0.4122360322152914,
+ "grad_norm": 20993.388671875,
+ "learning_rate": 7.610997603572675e-05,
+ "loss": 0.4386,
+ "step": 79900
+ },
+ {
+ "epoch": 0.4124940021979043,
+ "grad_norm": 23693.26171875,
+ "learning_rate": 7.607881530766596e-05,
+ "loss": 0.4385,
+ "step": 79950
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "grad_norm": 22608.26953125,
+ "learning_rate": 7.604764065923852e-05,
+ "loss": 0.4415,
+ "step": 80000
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "eval_loss": 0.4290848970413208,
+ "eval_runtime": 3332.9887,
+ "eval_samples_per_second": 930.432,
+ "eval_steps_per_second": 1.817,
+ "step": 80000
+ },
+ {
+ "epoch": 0.4130099421631299,
+ "grad_norm": 23348.44921875,
+ "learning_rate": 7.60164521070849e-05,
+ "loss": 0.4392,
+ "step": 80050
+ },
+ {
+ "epoch": 0.4132679121457427,
+ "grad_norm": 19942.9921875,
+ "learning_rate": 7.598524966785293e-05,
+ "loss": 0.4362,
+ "step": 80100
+ },
+ {
+ "epoch": 0.41352588212835556,
+ "grad_norm": 22776.587890625,
+ "learning_rate": 7.595403335819786e-05,
+ "loss": 0.4402,
+ "step": 80150
+ },
+ {
+ "epoch": 0.41378385211096835,
+ "grad_norm": 22519.923828125,
+ "learning_rate": 7.592280319478233e-05,
+ "loss": 0.4412,
+ "step": 80200
+ },
+ {
+ "epoch": 0.4140418220935812,
+ "grad_norm": 22480.52734375,
+ "learning_rate": 7.589155919427645e-05,
+ "loss": 0.4393,
+ "step": 80250
+ },
+ {
+ "epoch": 0.414299792076194,
+ "grad_norm": 20900.625,
+ "learning_rate": 7.586030137335762e-05,
+ "loss": 0.4344,
+ "step": 80300
+ },
+ {
+ "epoch": 0.41455776205880684,
+ "grad_norm": 21272.306640625,
+ "learning_rate": 7.582902974871069e-05,
+ "loss": 0.4385,
+ "step": 80350
+ },
+ {
+ "epoch": 0.41481573204141964,
+ "grad_norm": 21448.478515625,
+ "learning_rate": 7.57977443370278e-05,
+ "loss": 0.4395,
+ "step": 80400
+ },
+ {
+ "epoch": 0.4150737020240325,
+ "grad_norm": 21854.537109375,
+ "learning_rate": 7.576644515500855e-05,
+ "loss": 0.4411,
+ "step": 80450
+ },
+ {
+ "epoch": 0.4153316720066453,
+ "grad_norm": 21458.689453125,
+ "learning_rate": 7.573513221935979e-05,
+ "loss": 0.4429,
+ "step": 80500
+ },
+ {
+ "epoch": 0.41558964198925813,
+ "grad_norm": 21895.71875,
+ "learning_rate": 7.57038055467958e-05,
+ "loss": 0.4391,
+ "step": 80550
+ },
+ {
+ "epoch": 0.415847611971871,
+ "grad_norm": 23495.921875,
+ "learning_rate": 7.567246515403812e-05,
+ "loss": 0.4398,
+ "step": 80600
+ },
+ {
+ "epoch": 0.41610558195448377,
+ "grad_norm": 26117.8671875,
+ "learning_rate": 7.564111105781568e-05,
+ "loss": 0.4407,
+ "step": 80650
+ },
+ {
+ "epoch": 0.4163635519370966,
+ "grad_norm": 21881.818359375,
+ "learning_rate": 7.560974327486466e-05,
+ "loss": 0.4336,
+ "step": 80700
+ },
+ {
+ "epoch": 0.4166215219197094,
+ "grad_norm": 21309.1015625,
+ "learning_rate": 7.557836182192859e-05,
+ "loss": 0.4371,
+ "step": 80750
+ },
+ {
+ "epoch": 0.41687949190232226,
+ "grad_norm": 21723.498046875,
+ "learning_rate": 7.554696671575826e-05,
+ "loss": 0.4384,
+ "step": 80800
+ },
+ {
+ "epoch": 0.41713746188493506,
+ "grad_norm": 19767.9609375,
+ "learning_rate": 7.55155579731118e-05,
+ "loss": 0.4375,
+ "step": 80850
+ },
+ {
+ "epoch": 0.4173954318675479,
+ "grad_norm": 18992.958984375,
+ "learning_rate": 7.548413561075456e-05,
+ "loss": 0.4419,
+ "step": 80900
+ },
+ {
+ "epoch": 0.4176534018501607,
+ "grad_norm": 21593.255859375,
+ "learning_rate": 7.545269964545921e-05,
+ "loss": 0.4372,
+ "step": 80950
+ },
+ {
+ "epoch": 0.41791137183277355,
+ "grad_norm": 19369.3125,
+ "learning_rate": 7.542125009400565e-05,
+ "loss": 0.4402,
+ "step": 81000
+ },
+ {
+ "epoch": 0.41816934181538634,
+ "grad_norm": 20552.06640625,
+ "learning_rate": 7.538978697318105e-05,
+ "loss": 0.4418,
+ "step": 81050
+ },
+ {
+ "epoch": 0.4184273117979992,
+ "grad_norm": 21554.94140625,
+ "learning_rate": 7.53583102997798e-05,
+ "loss": 0.4406,
+ "step": 81100
+ },
+ {
+ "epoch": 0.418685281780612,
+ "grad_norm": 21098.296875,
+ "learning_rate": 7.532682009060356e-05,
+ "loss": 0.443,
+ "step": 81150
+ },
+ {
+ "epoch": 0.41894325176322483,
+ "grad_norm": 24148.71484375,
+ "learning_rate": 7.529531636246116e-05,
+ "loss": 0.4345,
+ "step": 81200
+ },
+ {
+ "epoch": 0.4192012217458376,
+ "grad_norm": 20404.298828125,
+ "learning_rate": 7.526379913216872e-05,
+ "loss": 0.4335,
+ "step": 81250
+ },
+ {
+ "epoch": 0.4194591917284505,
+ "grad_norm": 22061.607421875,
+ "learning_rate": 7.52322684165495e-05,
+ "loss": 0.4385,
+ "step": 81300
+ },
+ {
+ "epoch": 0.4197171617110633,
+ "grad_norm": 18455.380859375,
+ "learning_rate": 7.520072423243398e-05,
+ "loss": 0.4337,
+ "step": 81350
+ },
+ {
+ "epoch": 0.4199751316936761,
+ "grad_norm": 23344.2734375,
+ "learning_rate": 7.516916659665987e-05,
+ "loss": 0.4401,
+ "step": 81400
+ },
+ {
+ "epoch": 0.42023310167628897,
+ "grad_norm": 20872.77734375,
+ "learning_rate": 7.5137595526072e-05,
+ "loss": 0.4394,
+ "step": 81450
+ },
+ {
+ "epoch": 0.42049107165890176,
+ "grad_norm": 21003.841796875,
+ "learning_rate": 7.51060110375224e-05,
+ "loss": 0.4402,
+ "step": 81500
+ },
+ {
+ "epoch": 0.4207490416415146,
+ "grad_norm": 22772.330078125,
+ "learning_rate": 7.507441314787025e-05,
+ "loss": 0.4438,
+ "step": 81550
+ },
+ {
+ "epoch": 0.4210070116241274,
+ "grad_norm": 19593.216796875,
+ "learning_rate": 7.504280187398189e-05,
+ "loss": 0.4375,
+ "step": 81600
+ },
+ {
+ "epoch": 0.42126498160674025,
+ "grad_norm": 20914.66796875,
+ "learning_rate": 7.501117723273084e-05,
+ "loss": 0.4397,
+ "step": 81650
+ },
+ {
+ "epoch": 0.42152295158935305,
+ "grad_norm": 20479.12109375,
+ "learning_rate": 7.497953924099768e-05,
+ "loss": 0.4365,
+ "step": 81700
+ },
+ {
+ "epoch": 0.4217809215719659,
+ "grad_norm": 20309.25,
+ "learning_rate": 7.494788791567017e-05,
+ "loss": 0.4461,
+ "step": 81750
+ },
+ {
+ "epoch": 0.4220388915545787,
+ "grad_norm": 21467.72265625,
+ "learning_rate": 7.491622327364318e-05,
+ "loss": 0.4354,
+ "step": 81800
+ },
+ {
+ "epoch": 0.42229686153719154,
+ "grad_norm": 20826.80859375,
+ "learning_rate": 7.488454533181871e-05,
+ "loss": 0.4398,
+ "step": 81850
+ },
+ {
+ "epoch": 0.42255483151980433,
+ "grad_norm": 20537.826171875,
+ "learning_rate": 7.485285410710577e-05,
+ "loss": 0.4443,
+ "step": 81900
+ },
+ {
+ "epoch": 0.4228128015024172,
+ "grad_norm": 19521.810546875,
+ "learning_rate": 7.482114961642057e-05,
+ "loss": 0.4379,
+ "step": 81950
+ },
+ {
+ "epoch": 0.42307077148503003,
+ "grad_norm": 19407.5234375,
+ "learning_rate": 7.478943187668633e-05,
+ "loss": 0.4429,
+ "step": 82000
+ },
+ {
+ "epoch": 0.4233287414676428,
+ "grad_norm": 23058.337890625,
+ "learning_rate": 7.475770090483338e-05,
+ "loss": 0.4362,
+ "step": 82050
+ },
+ {
+ "epoch": 0.42358671145025567,
+ "grad_norm": 27362.29296875,
+ "learning_rate": 7.472595671779907e-05,
+ "loss": 0.4413,
+ "step": 82100
+ },
+ {
+ "epoch": 0.42384468143286846,
+ "grad_norm": 20389.08203125,
+ "learning_rate": 7.469419933252789e-05,
+ "loss": 0.4386,
+ "step": 82150
+ },
+ {
+ "epoch": 0.4241026514154813,
+ "grad_norm": 21554.896484375,
+ "learning_rate": 7.466242876597125e-05,
+ "loss": 0.4387,
+ "step": 82200
+ },
+ {
+ "epoch": 0.4243606213980941,
+ "grad_norm": 23449.822265625,
+ "learning_rate": 7.463064503508772e-05,
+ "loss": 0.4402,
+ "step": 82250
+ },
+ {
+ "epoch": 0.42461859138070696,
+ "grad_norm": 23945.1328125,
+ "learning_rate": 7.459884815684279e-05,
+ "loss": 0.4393,
+ "step": 82300
+ },
+ {
+ "epoch": 0.42487656136331975,
+ "grad_norm": 21705.064453125,
+ "learning_rate": 7.456703814820904e-05,
+ "loss": 0.4374,
+ "step": 82350
+ },
+ {
+ "epoch": 0.4251345313459326,
+ "grad_norm": 20050.66796875,
+ "learning_rate": 7.453521502616607e-05,
+ "loss": 0.4433,
+ "step": 82400
+ },
+ {
+ "epoch": 0.4253925013285454,
+ "grad_norm": 24757.845703125,
+ "learning_rate": 7.45033788077004e-05,
+ "loss": 0.4362,
+ "step": 82450
+ },
+ {
+ "epoch": 0.42565047131115824,
+ "grad_norm": 21754.42578125,
+ "learning_rate": 7.44715295098056e-05,
+ "loss": 0.4386,
+ "step": 82500
+ },
+ {
+ "epoch": 0.42590844129377103,
+ "grad_norm": 22891.12890625,
+ "learning_rate": 7.443966714948222e-05,
+ "loss": 0.4438,
+ "step": 82550
+ },
+ {
+ "epoch": 0.4261664112763839,
+ "grad_norm": 22174.580078125,
+ "learning_rate": 7.440779174373776e-05,
+ "loss": 0.4388,
+ "step": 82600
+ },
+ {
+ "epoch": 0.4264243812589967,
+ "grad_norm": 20407.677734375,
+ "learning_rate": 7.43759033095867e-05,
+ "loss": 0.4412,
+ "step": 82650
+ },
+ {
+ "epoch": 0.4266823512416095,
+ "grad_norm": 21960.552734375,
+ "learning_rate": 7.434400186405045e-05,
+ "loss": 0.4394,
+ "step": 82700
+ },
+ {
+ "epoch": 0.4269403212242224,
+ "grad_norm": 20736.583984375,
+ "learning_rate": 7.431208742415741e-05,
+ "loss": 0.4382,
+ "step": 82750
+ },
+ {
+ "epoch": 0.42719829120683517,
+ "grad_norm": 21133.63671875,
+ "learning_rate": 7.428016000694286e-05,
+ "loss": 0.4379,
+ "step": 82800
+ },
+ {
+ "epoch": 0.427456261189448,
+ "grad_norm": 23741.525390625,
+ "learning_rate": 7.424821962944908e-05,
+ "loss": 0.4398,
+ "step": 82850
+ },
+ {
+ "epoch": 0.4277142311720608,
+ "grad_norm": 21936.802734375,
+ "learning_rate": 7.42162663087252e-05,
+ "loss": 0.4383,
+ "step": 82900
+ },
+ {
+ "epoch": 0.42797220115467366,
+ "grad_norm": 24459.85546875,
+ "learning_rate": 7.418430006182727e-05,
+ "loss": 0.4393,
+ "step": 82950
+ },
+ {
+ "epoch": 0.42823017113728645,
+ "grad_norm": 21729.9921875,
+ "learning_rate": 7.415232090581828e-05,
+ "loss": 0.4421,
+ "step": 83000
+ },
+ {
+ "epoch": 0.4284881411198993,
+ "grad_norm": 21081.5703125,
+ "learning_rate": 7.412032885776807e-05,
+ "loss": 0.4414,
+ "step": 83050
+ },
+ {
+ "epoch": 0.4287461111025121,
+ "grad_norm": 20296.740234375,
+ "learning_rate": 7.408832393475338e-05,
+ "loss": 0.4316,
+ "step": 83100
+ },
+ {
+ "epoch": 0.42900408108512494,
+ "grad_norm": 20874.30078125,
+ "learning_rate": 7.405630615385781e-05,
+ "loss": 0.433,
+ "step": 83150
+ },
+ {
+ "epoch": 0.42926205106773774,
+ "grad_norm": 20673.11328125,
+ "learning_rate": 7.402427553217183e-05,
+ "loss": 0.4386,
+ "step": 83200
+ },
+ {
+ "epoch": 0.4295200210503506,
+ "grad_norm": 22462.07421875,
+ "learning_rate": 7.39922320867928e-05,
+ "loss": 0.4464,
+ "step": 83250
+ },
+ {
+ "epoch": 0.4297779910329634,
+ "grad_norm": 20411.771484375,
+ "learning_rate": 7.396017583482487e-05,
+ "loss": 0.444,
+ "step": 83300
+ },
+ {
+ "epoch": 0.43003596101557623,
+ "grad_norm": 21137.6953125,
+ "learning_rate": 7.392810679337902e-05,
+ "loss": 0.4416,
+ "step": 83350
+ },
+ {
+ "epoch": 0.4302939309981891,
+ "grad_norm": 23059.064453125,
+ "learning_rate": 7.38960249795731e-05,
+ "loss": 0.4401,
+ "step": 83400
+ },
+ {
+ "epoch": 0.43055190098080187,
+ "grad_norm": 20305.22265625,
+ "learning_rate": 7.386393041053176e-05,
+ "loss": 0.4399,
+ "step": 83450
+ },
+ {
+ "epoch": 0.4308098709634147,
+ "grad_norm": 22247.779296875,
+ "learning_rate": 7.38318231033865e-05,
+ "loss": 0.4362,
+ "step": 83500
+ },
+ {
+ "epoch": 0.4310678409460275,
+ "grad_norm": 22231.337890625,
+ "learning_rate": 7.379970307527552e-05,
+ "loss": 0.4417,
+ "step": 83550
+ },
+ {
+ "epoch": 0.43132581092864036,
+ "grad_norm": 21788.875,
+ "learning_rate": 7.376757034334388e-05,
+ "loss": 0.4374,
+ "step": 83600
+ },
+ {
+ "epoch": 0.43158378091125316,
+ "grad_norm": 22237.51953125,
+ "learning_rate": 7.373542492474343e-05,
+ "loss": 0.4372,
+ "step": 83650
+ },
+ {
+ "epoch": 0.431841750893866,
+ "grad_norm": 21732.943359375,
+ "learning_rate": 7.370326683663278e-05,
+ "loss": 0.4395,
+ "step": 83700
+ },
+ {
+ "epoch": 0.4320997208764788,
+ "grad_norm": 19517.212890625,
+ "learning_rate": 7.367109609617729e-05,
+ "loss": 0.4371,
+ "step": 83750
+ },
+ {
+ "epoch": 0.43235769085909165,
+ "grad_norm": 23681.388671875,
+ "learning_rate": 7.363891272054903e-05,
+ "loss": 0.4383,
+ "step": 83800
+ },
+ {
+ "epoch": 0.43261566084170444,
+ "grad_norm": 23889.822265625,
+ "learning_rate": 7.360671672692691e-05,
+ "loss": 0.441,
+ "step": 83850
+ },
+ {
+ "epoch": 0.4328736308243173,
+ "grad_norm": 21159.45703125,
+ "learning_rate": 7.357450813249654e-05,
+ "loss": 0.4328,
+ "step": 83900
+ },
+ {
+ "epoch": 0.4331316008069301,
+ "grad_norm": 20617.83984375,
+ "learning_rate": 7.354228695445023e-05,
+ "loss": 0.4395,
+ "step": 83950
+ },
+ {
+ "epoch": 0.43338957078954293,
+ "grad_norm": 19741.568359375,
+ "learning_rate": 7.351005320998699e-05,
+ "loss": 0.4356,
+ "step": 84000
+ },
+ {
+ "epoch": 0.4336475407721557,
+ "grad_norm": 21407.771484375,
+ "learning_rate": 7.347780691631259e-05,
+ "loss": 0.4322,
+ "step": 84050
+ },
+ {
+ "epoch": 0.4339055107547686,
+ "grad_norm": 22396.5625,
+ "learning_rate": 7.344554809063947e-05,
+ "loss": 0.4379,
+ "step": 84100
+ },
+ {
+ "epoch": 0.4341634807373814,
+ "grad_norm": 23536.361328125,
+ "learning_rate": 7.34132767501868e-05,
+ "loss": 0.4372,
+ "step": 84150
+ },
+ {
+ "epoch": 0.4344214507199942,
+ "grad_norm": 23622.90234375,
+ "learning_rate": 7.338099291218036e-05,
+ "loss": 0.4361,
+ "step": 84200
+ },
+ {
+ "epoch": 0.43467942070260707,
+ "grad_norm": 24463.931640625,
+ "learning_rate": 7.334869659385264e-05,
+ "loss": 0.4478,
+ "step": 84250
+ },
+ {
+ "epoch": 0.43493739068521986,
+ "grad_norm": 21666.328125,
+ "learning_rate": 7.331638781244283e-05,
+ "loss": 0.4387,
+ "step": 84300
+ },
+ {
+ "epoch": 0.4351953606678327,
+ "grad_norm": 21145.6875,
+ "learning_rate": 7.328406658519669e-05,
+ "loss": 0.4362,
+ "step": 84350
+ },
+ {
+ "epoch": 0.4354533306504455,
+ "grad_norm": 21766.228515625,
+ "learning_rate": 7.325173292936667e-05,
+ "loss": 0.4433,
+ "step": 84400
+ },
+ {
+ "epoch": 0.43571130063305835,
+ "grad_norm": 23118.056640625,
+ "learning_rate": 7.321938686221185e-05,
+ "loss": 0.4317,
+ "step": 84450
+ },
+ {
+ "epoch": 0.43596927061567115,
+ "grad_norm": 20925.833984375,
+ "learning_rate": 7.318702840099793e-05,
+ "loss": 0.4348,
+ "step": 84500
+ },
+ {
+ "epoch": 0.436227240598284,
+ "grad_norm": 21725.630859375,
+ "learning_rate": 7.315465756299727e-05,
+ "loss": 0.4363,
+ "step": 84550
+ },
+ {
+ "epoch": 0.4364852105808968,
+ "grad_norm": 20223.537109375,
+ "learning_rate": 7.312227436548875e-05,
+ "loss": 0.4363,
+ "step": 84600
+ },
+ {
+ "epoch": 0.43674318056350964,
+ "grad_norm": 22766.71484375,
+ "learning_rate": 7.308987882575793e-05,
+ "loss": 0.442,
+ "step": 84650
+ },
+ {
+ "epoch": 0.43700115054612243,
+ "grad_norm": 20453.341796875,
+ "learning_rate": 7.305747096109688e-05,
+ "loss": 0.4362,
+ "step": 84700
+ },
+ {
+ "epoch": 0.4372591205287353,
+ "grad_norm": 20761.466796875,
+ "learning_rate": 7.302505078880431e-05,
+ "loss": 0.435,
+ "step": 84750
+ },
+ {
+ "epoch": 0.4375170905113481,
+ "grad_norm": 20815.27734375,
+ "learning_rate": 7.299261832618551e-05,
+ "loss": 0.4398,
+ "step": 84800
+ },
+ {
+ "epoch": 0.4377750604939609,
+ "grad_norm": 22528.06640625,
+ "learning_rate": 7.296017359055224e-05,
+ "loss": 0.44,
+ "step": 84850
+ },
+ {
+ "epoch": 0.43803303047657377,
+ "grad_norm": 21391.71484375,
+ "learning_rate": 7.292771659922293e-05,
+ "loss": 0.4376,
+ "step": 84900
+ },
+ {
+ "epoch": 0.43829100045918656,
+ "grad_norm": 21485.966796875,
+ "learning_rate": 7.289524736952245e-05,
+ "loss": 0.4424,
+ "step": 84950
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "grad_norm": 21160.314453125,
+ "learning_rate": 7.286276591878228e-05,
+ "loss": 0.4473,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "eval_loss": 0.4252757728099823,
+ "eval_runtime": 3252.991,
+ "eval_samples_per_second": 953.313,
+ "eval_steps_per_second": 1.862,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4388069404244122,
+ "grad_norm": 29667.109375,
+ "learning_rate": 7.283027226434036e-05,
+ "loss": 0.4414,
+ "step": 85050
+ },
+ {
+ "epoch": 0.43906491040702506,
+ "grad_norm": 24990.86328125,
+ "learning_rate": 7.27977664235412e-05,
+ "loss": 0.4321,
+ "step": 85100
+ },
+ {
+ "epoch": 0.43932288038963785,
+ "grad_norm": 21708.86328125,
+ "learning_rate": 7.276524841373576e-05,
+ "loss": 0.4331,
+ "step": 85150
+ },
+ {
+ "epoch": 0.4395808503722507,
+ "grad_norm": 22323.1015625,
+ "learning_rate": 7.273271825228157e-05,
+ "loss": 0.4372,
+ "step": 85200
+ },
+ {
+ "epoch": 0.4398388203548635,
+ "grad_norm": 21696.2734375,
+ "learning_rate": 7.270017595654255e-05,
+ "loss": 0.4271,
+ "step": 85250
+ },
+ {
+ "epoch": 0.44009679033747634,
+ "grad_norm": 23364.560546875,
+ "learning_rate": 7.266762154388917e-05,
+ "loss": 0.4327,
+ "step": 85300
+ },
+ {
+ "epoch": 0.44035476032008913,
+ "grad_norm": 21834.607421875,
+ "learning_rate": 7.263505503169834e-05,
+ "loss": 0.4337,
+ "step": 85350
+ },
+ {
+ "epoch": 0.440612730302702,
+ "grad_norm": 18636.244140625,
+ "learning_rate": 7.260247643735343e-05,
+ "loss": 0.4393,
+ "step": 85400
+ },
+ {
+ "epoch": 0.4408707002853148,
+ "grad_norm": 20385.875,
+ "learning_rate": 7.256988577824427e-05,
+ "loss": 0.4398,
+ "step": 85450
+ },
+ {
+ "epoch": 0.4411286702679276,
+ "grad_norm": 21459.576171875,
+ "learning_rate": 7.253728307176713e-05,
+ "loss": 0.435,
+ "step": 85500
+ },
+ {
+ "epoch": 0.4413866402505405,
+ "grad_norm": 22838.716796875,
+ "learning_rate": 7.25046683353247e-05,
+ "loss": 0.4368,
+ "step": 85550
+ },
+ {
+ "epoch": 0.44164461023315327,
+ "grad_norm": 23016.4140625,
+ "learning_rate": 7.247204158632608e-05,
+ "loss": 0.4353,
+ "step": 85600
+ },
+ {
+ "epoch": 0.4419025802157661,
+ "grad_norm": 22318.193359375,
+ "learning_rate": 7.243940284218682e-05,
+ "loss": 0.4374,
+ "step": 85650
+ },
+ {
+ "epoch": 0.4421605501983789,
+ "grad_norm": 20475.376953125,
+ "learning_rate": 7.240675212032884e-05,
+ "loss": 0.4339,
+ "step": 85700
+ },
+ {
+ "epoch": 0.44241852018099176,
+ "grad_norm": 22276.287109375,
+ "learning_rate": 7.237408943818042e-05,
+ "loss": 0.4275,
+ "step": 85750
+ },
+ {
+ "epoch": 0.44267649016360455,
+ "grad_norm": 22131.654296875,
+ "learning_rate": 7.234141481317634e-05,
+ "loss": 0.4373,
+ "step": 85800
+ },
+ {
+ "epoch": 0.4429344601462174,
+ "grad_norm": 24779.14453125,
+ "learning_rate": 7.230872826275765e-05,
+ "loss": 0.4347,
+ "step": 85850
+ },
+ {
+ "epoch": 0.4431924301288302,
+ "grad_norm": 22474.443359375,
+ "learning_rate": 7.227602980437179e-05,
+ "loss": 0.4341,
+ "step": 85900
+ },
+ {
+ "epoch": 0.44345040011144304,
+ "grad_norm": 21620.056640625,
+ "learning_rate": 7.224331945547258e-05,
+ "loss": 0.4399,
+ "step": 85950
+ },
+ {
+ "epoch": 0.44370837009405584,
+ "grad_norm": 21546.8046875,
+ "learning_rate": 7.221059723352014e-05,
+ "loss": 0.4437,
+ "step": 86000
+ },
+ {
+ "epoch": 0.4439663400766687,
+ "grad_norm": 22283.0078125,
+ "learning_rate": 7.2177863155981e-05,
+ "loss": 0.4403,
+ "step": 86050
+ },
+ {
+ "epoch": 0.4442243100592815,
+ "grad_norm": 21332.576171875,
+ "learning_rate": 7.214511724032795e-05,
+ "loss": 0.4369,
+ "step": 86100
+ },
+ {
+ "epoch": 0.44448228004189433,
+ "grad_norm": 23106.01953125,
+ "learning_rate": 7.211235950404013e-05,
+ "loss": 0.4369,
+ "step": 86150
+ },
+ {
+ "epoch": 0.4447402500245071,
+ "grad_norm": 21826.2734375,
+ "learning_rate": 7.207958996460298e-05,
+ "loss": 0.4407,
+ "step": 86200
+ },
+ {
+ "epoch": 0.44499822000711997,
+ "grad_norm": 22308.90625,
+ "learning_rate": 7.204680863950825e-05,
+ "loss": 0.4349,
+ "step": 86250
+ },
+ {
+ "epoch": 0.4452561899897328,
+ "grad_norm": 24916.359375,
+ "learning_rate": 7.2014015546254e-05,
+ "loss": 0.436,
+ "step": 86300
+ },
+ {
+ "epoch": 0.4455141599723456,
+ "grad_norm": 22585.77734375,
+ "learning_rate": 7.198121070234453e-05,
+ "loss": 0.4311,
+ "step": 86350
+ },
+ {
+ "epoch": 0.44577212995495846,
+ "grad_norm": 22984.658203125,
+ "learning_rate": 7.194839412529042e-05,
+ "loss": 0.4324,
+ "step": 86400
+ },
+ {
+ "epoch": 0.44603009993757126,
+ "grad_norm": 22495.552734375,
+ "learning_rate": 7.191556583260853e-05,
+ "loss": 0.4306,
+ "step": 86450
+ },
+ {
+ "epoch": 0.4462880699201841,
+ "grad_norm": 21413.2578125,
+ "learning_rate": 7.188272584182196e-05,
+ "loss": 0.4404,
+ "step": 86500
+ },
+ {
+ "epoch": 0.4465460399027969,
+ "grad_norm": 23719.43359375,
+ "learning_rate": 7.184987417046007e-05,
+ "loss": 0.4321,
+ "step": 86550
+ },
+ {
+ "epoch": 0.44680400988540975,
+ "grad_norm": 22586.095703125,
+ "learning_rate": 7.181701083605846e-05,
+ "loss": 0.4349,
+ "step": 86600
+ },
+ {
+ "epoch": 0.44706197986802254,
+ "grad_norm": 20580.166015625,
+ "learning_rate": 7.178413585615891e-05,
+ "loss": 0.4323,
+ "step": 86650
+ },
+ {
+ "epoch": 0.4473199498506354,
+ "grad_norm": 21345.71875,
+ "learning_rate": 7.175124924830948e-05,
+ "loss": 0.4326,
+ "step": 86700
+ },
+ {
+ "epoch": 0.4475779198332482,
+ "grad_norm": 20615.333984375,
+ "learning_rate": 7.171835103006438e-05,
+ "loss": 0.4425,
+ "step": 86750
+ },
+ {
+ "epoch": 0.44783588981586103,
+ "grad_norm": 25518.546875,
+ "learning_rate": 7.168544121898407e-05,
+ "loss": 0.4307,
+ "step": 86800
+ },
+ {
+ "epoch": 0.4480938597984738,
+ "grad_norm": 23149.703125,
+ "learning_rate": 7.165251983263512e-05,
+ "loss": 0.4336,
+ "step": 86850
+ },
+ {
+ "epoch": 0.4483518297810867,
+ "grad_norm": 22026.19140625,
+ "learning_rate": 7.16195868885904e-05,
+ "loss": 0.4401,
+ "step": 86900
+ },
+ {
+ "epoch": 0.44860979976369947,
+ "grad_norm": 21140.90234375,
+ "learning_rate": 7.158664240442881e-05,
+ "loss": 0.436,
+ "step": 86950
+ },
+ {
+ "epoch": 0.4488677697463123,
+ "grad_norm": 25489.1796875,
+ "learning_rate": 7.155368639773552e-05,
+ "loss": 0.4379,
+ "step": 87000
+ },
+ {
+ "epoch": 0.44912573972892517,
+ "grad_norm": 21035.275390625,
+ "learning_rate": 7.152071888610176e-05,
+ "loss": 0.433,
+ "step": 87050
+ },
+ {
+ "epoch": 0.44938370971153796,
+ "grad_norm": 25905.03515625,
+ "learning_rate": 7.148773988712503e-05,
+ "loss": 0.4423,
+ "step": 87100
+ },
+ {
+ "epoch": 0.4496416796941508,
+ "grad_norm": 21237.857421875,
+ "learning_rate": 7.14547494184088e-05,
+ "loss": 0.4346,
+ "step": 87150
+ },
+ {
+ "epoch": 0.4498996496767636,
+ "grad_norm": 19255.748046875,
+ "learning_rate": 7.14217474975628e-05,
+ "loss": 0.4333,
+ "step": 87200
+ },
+ {
+ "epoch": 0.45015761965937645,
+ "grad_norm": 22115.05078125,
+ "learning_rate": 7.138873414220277e-05,
+ "loss": 0.4371,
+ "step": 87250
+ },
+ {
+ "epoch": 0.45041558964198924,
+ "grad_norm": 23271.462890625,
+ "learning_rate": 7.135570936995064e-05,
+ "loss": 0.4362,
+ "step": 87300
+ },
+ {
+ "epoch": 0.4506735596246021,
+ "grad_norm": 24245.02734375,
+ "learning_rate": 7.132267319843438e-05,
+ "loss": 0.4371,
+ "step": 87350
+ },
+ {
+ "epoch": 0.4509315296072149,
+ "grad_norm": 22234.224609375,
+ "learning_rate": 7.128962564528805e-05,
+ "loss": 0.4306,
+ "step": 87400
+ },
+ {
+ "epoch": 0.45118949958982774,
+ "grad_norm": 22704.115234375,
+ "learning_rate": 7.12565667281518e-05,
+ "loss": 0.4408,
+ "step": 87450
+ },
+ {
+ "epoch": 0.45144746957244053,
+ "grad_norm": 21906.650390625,
+ "learning_rate": 7.122349646467183e-05,
+ "loss": 0.4322,
+ "step": 87500
+ },
+ {
+ "epoch": 0.4517054395550534,
+ "grad_norm": 21960.501953125,
+ "learning_rate": 7.119041487250045e-05,
+ "loss": 0.4322,
+ "step": 87550
+ },
+ {
+ "epoch": 0.45196340953766617,
+ "grad_norm": 20264.14453125,
+ "learning_rate": 7.11573219692959e-05,
+ "loss": 0.4403,
+ "step": 87600
+ },
+ {
+ "epoch": 0.452221379520279,
+ "grad_norm": 20237.078125,
+ "learning_rate": 7.112421777272259e-05,
+ "loss": 0.4421,
+ "step": 87650
+ },
+ {
+ "epoch": 0.45247934950289187,
+ "grad_norm": 22111.3203125,
+ "learning_rate": 7.109110230045087e-05,
+ "loss": 0.4386,
+ "step": 87700
+ },
+ {
+ "epoch": 0.45273731948550466,
+ "grad_norm": 20690.015625,
+ "learning_rate": 7.105797557015715e-05,
+ "loss": 0.4315,
+ "step": 87750
+ },
+ {
+ "epoch": 0.4529952894681175,
+ "grad_norm": 23273.888671875,
+ "learning_rate": 7.102483759952384e-05,
+ "loss": 0.4397,
+ "step": 87800
+ },
+ {
+ "epoch": 0.4532532594507303,
+ "grad_norm": 20268.541015625,
+ "learning_rate": 7.099168840623935e-05,
+ "loss": 0.4381,
+ "step": 87850
+ },
+ {
+ "epoch": 0.45351122943334315,
+ "grad_norm": 21591.724609375,
+ "learning_rate": 7.095852800799806e-05,
+ "loss": 0.4368,
+ "step": 87900
+ },
+ {
+ "epoch": 0.45376919941595595,
+ "grad_norm": 20683.994140625,
+ "learning_rate": 7.092535642250035e-05,
+ "loss": 0.4315,
+ "step": 87950
+ },
+ {
+ "epoch": 0.4540271693985688,
+ "grad_norm": 22910.26953125,
+ "learning_rate": 7.089217366745258e-05,
+ "loss": 0.4415,
+ "step": 88000
+ },
+ {
+ "epoch": 0.4542851393811816,
+ "grad_norm": 22321.40234375,
+ "learning_rate": 7.085897976056706e-05,
+ "loss": 0.4386,
+ "step": 88050
+ },
+ {
+ "epoch": 0.45454310936379444,
+ "grad_norm": 20730.521484375,
+ "learning_rate": 7.082577471956206e-05,
+ "loss": 0.4335,
+ "step": 88100
+ },
+ {
+ "epoch": 0.45480107934640723,
+ "grad_norm": 23302.033203125,
+ "learning_rate": 7.079255856216177e-05,
+ "loss": 0.4366,
+ "step": 88150
+ },
+ {
+ "epoch": 0.4550590493290201,
+ "grad_norm": 21125.5625,
+ "learning_rate": 7.075933130609636e-05,
+ "loss": 0.4388,
+ "step": 88200
+ },
+ {
+ "epoch": 0.4553170193116329,
+ "grad_norm": 24245.548828125,
+ "learning_rate": 7.072609296910187e-05,
+ "loss": 0.4369,
+ "step": 88250
+ },
+ {
+ "epoch": 0.4555749892942457,
+ "grad_norm": 19609.1484375,
+ "learning_rate": 7.06928435689203e-05,
+ "loss": 0.4287,
+ "step": 88300
+ },
+ {
+ "epoch": 0.4558329592768585,
+ "grad_norm": 21653.08984375,
+ "learning_rate": 7.065958312329953e-05,
+ "loss": 0.4357,
+ "step": 88350
+ },
+ {
+ "epoch": 0.45609092925947137,
+ "grad_norm": 23725.236328125,
+ "learning_rate": 7.062631164999331e-05,
+ "loss": 0.4382,
+ "step": 88400
+ },
+ {
+ "epoch": 0.4563488992420842,
+ "grad_norm": 21436.92578125,
+ "learning_rate": 7.059302916676137e-05,
+ "loss": 0.4373,
+ "step": 88450
+ },
+ {
+ "epoch": 0.456606869224697,
+ "grad_norm": 20179.189453125,
+ "learning_rate": 7.05597356913692e-05,
+ "loss": 0.4304,
+ "step": 88500
+ },
+ {
+ "epoch": 0.45686483920730986,
+ "grad_norm": 22804.22265625,
+ "learning_rate": 7.052643124158824e-05,
+ "loss": 0.4343,
+ "step": 88550
+ },
+ {
+ "epoch": 0.45712280918992265,
+ "grad_norm": 21530.931640625,
+ "learning_rate": 7.049311583519574e-05,
+ "loss": 0.4364,
+ "step": 88600
+ },
+ {
+ "epoch": 0.4573807791725355,
+ "grad_norm": 21411.646484375,
+ "learning_rate": 7.045978948997486e-05,
+ "loss": 0.436,
+ "step": 88650
+ },
+ {
+ "epoch": 0.4576387491551483,
+ "grad_norm": 20853.962890625,
+ "learning_rate": 7.042645222371451e-05,
+ "loss": 0.436,
+ "step": 88700
+ },
+ {
+ "epoch": 0.45789671913776114,
+ "grad_norm": 20940.28125,
+ "learning_rate": 7.039310405420952e-05,
+ "loss": 0.4349,
+ "step": 88750
+ },
+ {
+ "epoch": 0.45815468912037394,
+ "grad_norm": 22368.05078125,
+ "learning_rate": 7.035974499926045e-05,
+ "loss": 0.4355,
+ "step": 88800
+ },
+ {
+ "epoch": 0.4584126591029868,
+ "grad_norm": 21155.3984375,
+ "learning_rate": 7.032637507667377e-05,
+ "loss": 0.4292,
+ "step": 88850
+ },
+ {
+ "epoch": 0.4586706290855996,
+ "grad_norm": 21627.353515625,
+ "learning_rate": 7.029299430426164e-05,
+ "loss": 0.4404,
+ "step": 88900
+ },
+ {
+ "epoch": 0.45892859906821243,
+ "grad_norm": 22008.23046875,
+ "learning_rate": 7.025960269984212e-05,
+ "loss": 0.431,
+ "step": 88950
+ },
+ {
+ "epoch": 0.4591865690508252,
+ "grad_norm": 21588.109375,
+ "learning_rate": 7.022620028123898e-05,
+ "loss": 0.4319,
+ "step": 89000
+ },
+ {
+ "epoch": 0.45944453903343807,
+ "grad_norm": 21680.646484375,
+ "learning_rate": 7.019278706628179e-05,
+ "loss": 0.4403,
+ "step": 89050
+ },
+ {
+ "epoch": 0.4597025090160509,
+ "grad_norm": 25427.423828125,
+ "learning_rate": 7.015936307280587e-05,
+ "loss": 0.435,
+ "step": 89100
+ },
+ {
+ "epoch": 0.4599604789986637,
+ "grad_norm": 22674.693359375,
+ "learning_rate": 7.01259283186523e-05,
+ "loss": 0.4377,
+ "step": 89150
+ },
+ {
+ "epoch": 0.46021844898127656,
+ "grad_norm": 24841.029296875,
+ "learning_rate": 7.009248282166793e-05,
+ "loss": 0.4387,
+ "step": 89200
+ },
+ {
+ "epoch": 0.46047641896388936,
+ "grad_norm": 21259.369140625,
+ "learning_rate": 7.005902659970528e-05,
+ "loss": 0.4355,
+ "step": 89250
+ },
+ {
+ "epoch": 0.4607343889465022,
+ "grad_norm": 19364.466796875,
+ "learning_rate": 7.002555967062265e-05,
+ "loss": 0.4353,
+ "step": 89300
+ },
+ {
+ "epoch": 0.460992358929115,
+ "grad_norm": 25116.47265625,
+ "learning_rate": 6.999208205228405e-05,
+ "loss": 0.4328,
+ "step": 89350
+ },
+ {
+ "epoch": 0.46125032891172785,
+ "grad_norm": 24426.4296875,
+ "learning_rate": 6.995859376255918e-05,
+ "loss": 0.4331,
+ "step": 89400
+ },
+ {
+ "epoch": 0.46150829889434064,
+ "grad_norm": 20802.759765625,
+ "learning_rate": 6.99250948193234e-05,
+ "loss": 0.4294,
+ "step": 89450
+ },
+ {
+ "epoch": 0.4617662688769535,
+ "grad_norm": 23164.2109375,
+ "learning_rate": 6.989158524045787e-05,
+ "loss": 0.4338,
+ "step": 89500
+ },
+ {
+ "epoch": 0.4620242388595663,
+ "grad_norm": 20543.28515625,
+ "learning_rate": 6.98580650438493e-05,
+ "loss": 0.4243,
+ "step": 89550
+ },
+ {
+ "epoch": 0.46228220884217913,
+ "grad_norm": 22468.732421875,
+ "learning_rate": 6.982453424739016e-05,
+ "loss": 0.4306,
+ "step": 89600
+ },
+ {
+ "epoch": 0.4625401788247919,
+ "grad_norm": 22903.12890625,
+ "learning_rate": 6.979099286897849e-05,
+ "loss": 0.4316,
+ "step": 89650
+ },
+ {
+ "epoch": 0.4627981488074048,
+ "grad_norm": 23074.068359375,
+ "learning_rate": 6.975744092651808e-05,
+ "loss": 0.4371,
+ "step": 89700
+ },
+ {
+ "epoch": 0.46305611879001757,
+ "grad_norm": 22003.00390625,
+ "learning_rate": 6.972387843791827e-05,
+ "loss": 0.4329,
+ "step": 89750
+ },
+ {
+ "epoch": 0.4633140887726304,
+ "grad_norm": 21524.93359375,
+ "learning_rate": 6.969030542109407e-05,
+ "loss": 0.4348,
+ "step": 89800
+ },
+ {
+ "epoch": 0.46357205875524327,
+ "grad_norm": 20501.130859375,
+ "learning_rate": 6.965672189396614e-05,
+ "loss": 0.4286,
+ "step": 89850
+ },
+ {
+ "epoch": 0.46383002873785606,
+ "grad_norm": 21559.396484375,
+ "learning_rate": 6.962312787446068e-05,
+ "loss": 0.434,
+ "step": 89900
+ },
+ {
+ "epoch": 0.4640879987204689,
+ "grad_norm": 21185.537109375,
+ "learning_rate": 6.958952338050955e-05,
+ "loss": 0.4326,
+ "step": 89950
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "grad_norm": 23004.626953125,
+ "learning_rate": 6.955590843005016e-05,
+ "loss": 0.4272,
+ "step": 90000
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "eval_loss": 0.4223860800266266,
+ "eval_runtime": 3251.8949,
+ "eval_samples_per_second": 953.635,
+ "eval_steps_per_second": 1.863,
+ "step": 90000
+ },
+ {
+ "epoch": 0.46460393868569455,
+ "grad_norm": 20333.259765625,
+ "learning_rate": 6.952228304102553e-05,
+ "loss": 0.4338,
+ "step": 90050
+ },
+ {
+ "epoch": 0.46486190866830734,
+ "grad_norm": 25967.029296875,
+ "learning_rate": 6.948864723138423e-05,
+ "loss": 0.4352,
+ "step": 90100
+ },
+ {
+ "epoch": 0.4651198786509202,
+ "grad_norm": 22849.9375,
+ "learning_rate": 6.945500101908043e-05,
+ "loss": 0.4358,
+ "step": 90150
+ },
+ {
+ "epoch": 0.465377848633533,
+ "grad_norm": 20628.9453125,
+ "learning_rate": 6.94213444220738e-05,
+ "loss": 0.4343,
+ "step": 90200
+ },
+ {
+ "epoch": 0.46563581861614584,
+ "grad_norm": 22179.84375,
+ "learning_rate": 6.938767745832959e-05,
+ "loss": 0.4314,
+ "step": 90250
+ },
+ {
+ "epoch": 0.46589378859875863,
+ "grad_norm": 24433.46484375,
+ "learning_rate": 6.935400014581858e-05,
+ "loss": 0.436,
+ "step": 90300
+ },
+ {
+ "epoch": 0.4661517585813715,
+ "grad_norm": 21914.666015625,
+ "learning_rate": 6.932031250251705e-05,
+ "loss": 0.431,
+ "step": 90350
+ },
+ {
+ "epoch": 0.46640972856398427,
+ "grad_norm": 19517.78125,
+ "learning_rate": 6.928661454640683e-05,
+ "loss": 0.4282,
+ "step": 90400
+ },
+ {
+ "epoch": 0.4666676985465971,
+ "grad_norm": 25924.5234375,
+ "learning_rate": 6.925290629547522e-05,
+ "loss": 0.4344,
+ "step": 90450
+ },
+ {
+ "epoch": 0.4669256685292099,
+ "grad_norm": 20866.927734375,
+ "learning_rate": 6.921918776771505e-05,
+ "loss": 0.4336,
+ "step": 90500
+ },
+ {
+ "epoch": 0.46718363851182276,
+ "grad_norm": 22734.5625,
+ "learning_rate": 6.91854589811246e-05,
+ "loss": 0.4375,
+ "step": 90550
+ },
+ {
+ "epoch": 0.4674416084944356,
+ "grad_norm": 21173.5703125,
+ "learning_rate": 6.915171995370766e-05,
+ "loss": 0.428,
+ "step": 90600
+ },
+ {
+ "epoch": 0.4676995784770484,
+ "grad_norm": 23864.681640625,
+ "learning_rate": 6.911797070347346e-05,
+ "loss": 0.4344,
+ "step": 90650
+ },
+ {
+ "epoch": 0.46795754845966125,
+ "grad_norm": 26236.091796875,
+ "learning_rate": 6.908421124843669e-05,
+ "loss": 0.4345,
+ "step": 90700
+ },
+ {
+ "epoch": 0.46821551844227405,
+ "grad_norm": 20788.6015625,
+ "learning_rate": 6.905044160661748e-05,
+ "loss": 0.4332,
+ "step": 90750
+ },
+ {
+ "epoch": 0.4684734884248869,
+ "grad_norm": 21382.2578125,
+ "learning_rate": 6.901666179604148e-05,
+ "loss": 0.4356,
+ "step": 90800
+ },
+ {
+ "epoch": 0.4687314584074997,
+ "grad_norm": 20230.220703125,
+ "learning_rate": 6.898287183473961e-05,
+ "loss": 0.4262,
+ "step": 90850
+ },
+ {
+ "epoch": 0.46898942839011254,
+ "grad_norm": 31838.697265625,
+ "learning_rate": 6.894907174074836e-05,
+ "loss": 0.4316,
+ "step": 90900
+ },
+ {
+ "epoch": 0.46924739837272533,
+ "grad_norm": 21029.5234375,
+ "learning_rate": 6.891526153210953e-05,
+ "loss": 0.4346,
+ "step": 90950
+ },
+ {
+ "epoch": 0.4695053683553382,
+ "grad_norm": 23617.826171875,
+ "learning_rate": 6.888144122687035e-05,
+ "loss": 0.4262,
+ "step": 91000
+ },
+ {
+ "epoch": 0.469763338337951,
+ "grad_norm": 23151.751953125,
+ "learning_rate": 6.884761084308349e-05,
+ "loss": 0.4296,
+ "step": 91050
+ },
+ {
+ "epoch": 0.4700213083205638,
+ "grad_norm": 19649.466796875,
+ "learning_rate": 6.881377039880692e-05,
+ "loss": 0.4325,
+ "step": 91100
+ },
+ {
+ "epoch": 0.4702792783031766,
+ "grad_norm": 20488.10546875,
+ "learning_rate": 6.8779919912104e-05,
+ "loss": 0.4352,
+ "step": 91150
+ },
+ {
+ "epoch": 0.47053724828578947,
+ "grad_norm": 21639.306640625,
+ "learning_rate": 6.874605940104349e-05,
+ "loss": 0.4319,
+ "step": 91200
+ },
+ {
+ "epoch": 0.4707952182684023,
+ "grad_norm": 21799.994140625,
+ "learning_rate": 6.871218888369947e-05,
+ "loss": 0.4315,
+ "step": 91250
+ },
+ {
+ "epoch": 0.4710531882510151,
+ "grad_norm": 22425.94140625,
+ "learning_rate": 6.867830837815137e-05,
+ "loss": 0.4381,
+ "step": 91300
+ },
+ {
+ "epoch": 0.47131115823362796,
+ "grad_norm": 22582.57421875,
+ "learning_rate": 6.864441790248396e-05,
+ "loss": 0.4297,
+ "step": 91350
+ },
+ {
+ "epoch": 0.47156912821624075,
+ "grad_norm": 21082.38671875,
+ "learning_rate": 6.861051747478726e-05,
+ "loss": 0.4292,
+ "step": 91400
+ },
+ {
+ "epoch": 0.4718270981988536,
+ "grad_norm": 23156.5546875,
+ "learning_rate": 6.857660711315672e-05,
+ "loss": 0.4276,
+ "step": 91450
+ },
+ {
+ "epoch": 0.4720850681814664,
+ "grad_norm": 21754.6796875,
+ "learning_rate": 6.854268683569302e-05,
+ "loss": 0.4369,
+ "step": 91500
+ },
+ {
+ "epoch": 0.47234303816407924,
+ "grad_norm": 22397.896484375,
+ "learning_rate": 6.850875666050216e-05,
+ "loss": 0.4312,
+ "step": 91550
+ },
+ {
+ "epoch": 0.47260100814669204,
+ "grad_norm": 21344.166015625,
+ "learning_rate": 6.847481660569537e-05,
+ "loss": 0.4291,
+ "step": 91600
+ },
+ {
+ "epoch": 0.4728589781293049,
+ "grad_norm": 23818.71484375,
+ "learning_rate": 6.844086668938923e-05,
+ "loss": 0.4352,
+ "step": 91650
+ },
+ {
+ "epoch": 0.4731169481119177,
+ "grad_norm": 21734.537109375,
+ "learning_rate": 6.840690692970554e-05,
+ "loss": 0.4326,
+ "step": 91700
+ },
+ {
+ "epoch": 0.47337491809453053,
+ "grad_norm": 22027.734375,
+ "learning_rate": 6.837293734477136e-05,
+ "loss": 0.4369,
+ "step": 91750
+ },
+ {
+ "epoch": 0.4736328880771433,
+ "grad_norm": 23111.103515625,
+ "learning_rate": 6.8338957952719e-05,
+ "loss": 0.4396,
+ "step": 91800
+ },
+ {
+ "epoch": 0.47389085805975617,
+ "grad_norm": 22521.767578125,
+ "learning_rate": 6.830496877168599e-05,
+ "loss": 0.4376,
+ "step": 91850
+ },
+ {
+ "epoch": 0.47414882804236896,
+ "grad_norm": 19730.158203125,
+ "learning_rate": 6.827096981981511e-05,
+ "loss": 0.4321,
+ "step": 91900
+ },
+ {
+ "epoch": 0.4744067980249818,
+ "grad_norm": 21871.134765625,
+ "learning_rate": 6.823696111525433e-05,
+ "loss": 0.4373,
+ "step": 91950
+ },
+ {
+ "epoch": 0.47466476800759466,
+ "grad_norm": 22332.384765625,
+ "learning_rate": 6.820294267615686e-05,
+ "loss": 0.4323,
+ "step": 92000
+ },
+ {
+ "epoch": 0.47492273799020746,
+ "grad_norm": 22426.59765625,
+ "learning_rate": 6.816891452068104e-05,
+ "loss": 0.4272,
+ "step": 92050
+ },
+ {
+ "epoch": 0.4751807079728203,
+ "grad_norm": 23286.05859375,
+ "learning_rate": 6.81348766669905e-05,
+ "loss": 0.4442,
+ "step": 92100
+ },
+ {
+ "epoch": 0.4754386779554331,
+ "grad_norm": 21696.1171875,
+ "learning_rate": 6.810082913325395e-05,
+ "loss": 0.4288,
+ "step": 92150
+ },
+ {
+ "epoch": 0.47569664793804595,
+ "grad_norm": 20548.908203125,
+ "learning_rate": 6.80667719376453e-05,
+ "loss": 0.4358,
+ "step": 92200
+ },
+ {
+ "epoch": 0.47595461792065874,
+ "grad_norm": 22605.1640625,
+ "learning_rate": 6.803270509834363e-05,
+ "loss": 0.4327,
+ "step": 92250
+ },
+ {
+ "epoch": 0.4762125879032716,
+ "grad_norm": 23604.30078125,
+ "learning_rate": 6.799862863353318e-05,
+ "loss": 0.441,
+ "step": 92300
+ },
+ {
+ "epoch": 0.4764705578858844,
+ "grad_norm": 22117.1796875,
+ "learning_rate": 6.796454256140328e-05,
+ "loss": 0.4289,
+ "step": 92350
+ },
+ {
+ "epoch": 0.47672852786849723,
+ "grad_norm": 22476.54296875,
+ "learning_rate": 6.793044690014842e-05,
+ "loss": 0.4319,
+ "step": 92400
+ },
+ {
+ "epoch": 0.47698649785111,
+ "grad_norm": 20855.140625,
+ "learning_rate": 6.789634166796821e-05,
+ "loss": 0.4326,
+ "step": 92450
+ },
+ {
+ "epoch": 0.4772444678337229,
+ "grad_norm": 23704.125,
+ "learning_rate": 6.786222688306734e-05,
+ "loss": 0.4374,
+ "step": 92500
+ },
+ {
+ "epoch": 0.47750243781633567,
+ "grad_norm": 20677.91015625,
+ "learning_rate": 6.782810256365568e-05,
+ "loss": 0.4261,
+ "step": 92550
+ },
+ {
+ "epoch": 0.4777604077989485,
+ "grad_norm": 21245.837890625,
+ "learning_rate": 6.779396872794807e-05,
+ "loss": 0.4309,
+ "step": 92600
+ },
+ {
+ "epoch": 0.4780183777815613,
+ "grad_norm": 25415.859375,
+ "learning_rate": 6.775982539416453e-05,
+ "loss": 0.437,
+ "step": 92650
+ },
+ {
+ "epoch": 0.47827634776417416,
+ "grad_norm": 20582.556640625,
+ "learning_rate": 6.772567258053007e-05,
+ "loss": 0.4349,
+ "step": 92700
+ },
+ {
+ "epoch": 0.478534317746787,
+ "grad_norm": 20002.013671875,
+ "learning_rate": 6.769151030527483e-05,
+ "loss": 0.4263,
+ "step": 92750
+ },
+ {
+ "epoch": 0.4787922877293998,
+ "grad_norm": 23287.6875,
+ "learning_rate": 6.765733858663397e-05,
+ "loss": 0.4332,
+ "step": 92800
+ },
+ {
+ "epoch": 0.47905025771201265,
+ "grad_norm": 22023.66796875,
+ "learning_rate": 6.76231574428477e-05,
+ "loss": 0.4339,
+ "step": 92850
+ },
+ {
+ "epoch": 0.47930822769462544,
+ "grad_norm": 21299.185546875,
+ "learning_rate": 6.758896689216122e-05,
+ "loss": 0.4293,
+ "step": 92900
+ },
+ {
+ "epoch": 0.4795661976772383,
+ "grad_norm": 21979.560546875,
+ "learning_rate": 6.755476695282479e-05,
+ "loss": 0.4314,
+ "step": 92950
+ },
+ {
+ "epoch": 0.4798241676598511,
+ "grad_norm": 21399.029296875,
+ "learning_rate": 6.752055764309372e-05,
+ "loss": 0.4374,
+ "step": 93000
+ },
+ {
+ "epoch": 0.48008213764246394,
+ "grad_norm": 23827.685546875,
+ "learning_rate": 6.748633898122823e-05,
+ "loss": 0.4348,
+ "step": 93050
+ },
+ {
+ "epoch": 0.48034010762507673,
+ "grad_norm": 21079.61328125,
+ "learning_rate": 6.74521109854936e-05,
+ "loss": 0.4312,
+ "step": 93100
+ },
+ {
+ "epoch": 0.4805980776076896,
+ "grad_norm": 20395.04296875,
+ "learning_rate": 6.741787367416006e-05,
+ "loss": 0.4246,
+ "step": 93150
+ },
+ {
+ "epoch": 0.48085604759030237,
+ "grad_norm": 21922.576171875,
+ "learning_rate": 6.738362706550284e-05,
+ "loss": 0.4355,
+ "step": 93200
+ },
+ {
+ "epoch": 0.4811140175729152,
+ "grad_norm": 21317.001953125,
+ "learning_rate": 6.734937117780211e-05,
+ "loss": 0.4302,
+ "step": 93250
+ },
+ {
+ "epoch": 0.481371987555528,
+ "grad_norm": 21387.46484375,
+ "learning_rate": 6.731510602934298e-05,
+ "loss": 0.434,
+ "step": 93300
+ },
+ {
+ "epoch": 0.48162995753814086,
+ "grad_norm": 24289.28515625,
+ "learning_rate": 6.728083163841554e-05,
+ "loss": 0.4338,
+ "step": 93350
+ },
+ {
+ "epoch": 0.4818879275207537,
+ "grad_norm": 23514.162109375,
+ "learning_rate": 6.72465480233148e-05,
+ "loss": 0.4357,
+ "step": 93400
+ },
+ {
+ "epoch": 0.4821458975033665,
+ "grad_norm": 21481.0859375,
+ "learning_rate": 6.721225520234068e-05,
+ "loss": 0.4307,
+ "step": 93450
+ },
+ {
+ "epoch": 0.48240386748597935,
+ "grad_norm": 25044.396484375,
+ "learning_rate": 6.717795319379805e-05,
+ "loss": 0.4335,
+ "step": 93500
+ },
+ {
+ "epoch": 0.48266183746859215,
+ "grad_norm": 21193.333984375,
+ "learning_rate": 6.714364201599662e-05,
+ "loss": 0.4243,
+ "step": 93550
+ },
+ {
+ "epoch": 0.482919807451205,
+ "grad_norm": 19113.275390625,
+ "learning_rate": 6.710932168725105e-05,
+ "loss": 0.4331,
+ "step": 93600
+ },
+ {
+ "epoch": 0.4831777774338178,
+ "grad_norm": 21924.162109375,
+ "learning_rate": 6.707499222588087e-05,
+ "loss": 0.4309,
+ "step": 93650
+ },
+ {
+ "epoch": 0.48343574741643064,
+ "grad_norm": 21123.498046875,
+ "learning_rate": 6.704065365021048e-05,
+ "loss": 0.4392,
+ "step": 93700
+ },
+ {
+ "epoch": 0.48369371739904343,
+ "grad_norm": 22201.29296875,
+ "learning_rate": 6.700630597856914e-05,
+ "loss": 0.4281,
+ "step": 93750
+ },
+ {
+ "epoch": 0.4839516873816563,
+ "grad_norm": 24237.494140625,
+ "learning_rate": 6.697194922929096e-05,
+ "loss": 0.4367,
+ "step": 93800
+ },
+ {
+ "epoch": 0.4842096573642691,
+ "grad_norm": 21306.8125,
+ "learning_rate": 6.693758342071495e-05,
+ "loss": 0.4374,
+ "step": 93850
+ },
+ {
+ "epoch": 0.4844676273468819,
+ "grad_norm": 22120.75,
+ "learning_rate": 6.690320857118488e-05,
+ "loss": 0.4309,
+ "step": 93900
+ },
+ {
+ "epoch": 0.4847255973294947,
+ "grad_norm": 20799.59765625,
+ "learning_rate": 6.686882469904939e-05,
+ "loss": 0.4262,
+ "step": 93950
+ },
+ {
+ "epoch": 0.48498356731210757,
+ "grad_norm": 22964.642578125,
+ "learning_rate": 6.683443182266192e-05,
+ "loss": 0.4338,
+ "step": 94000
+ },
+ {
+ "epoch": 0.48524153729472036,
+ "grad_norm": 22017.076171875,
+ "learning_rate": 6.68000299603807e-05,
+ "loss": 0.4317,
+ "step": 94050
+ },
+ {
+ "epoch": 0.4854995072773332,
+ "grad_norm": 21423.890625,
+ "learning_rate": 6.676561913056884e-05,
+ "loss": 0.4329,
+ "step": 94100
+ },
+ {
+ "epoch": 0.48575747725994606,
+ "grad_norm": 22123.390625,
+ "learning_rate": 6.67311993515941e-05,
+ "loss": 0.4309,
+ "step": 94150
+ },
+ {
+ "epoch": 0.48601544724255885,
+ "grad_norm": 23107.208984375,
+ "learning_rate": 6.669677064182915e-05,
+ "loss": 0.4316,
+ "step": 94200
+ },
+ {
+ "epoch": 0.4862734172251717,
+ "grad_norm": 21250.33203125,
+ "learning_rate": 6.666233301965132e-05,
+ "loss": 0.4289,
+ "step": 94250
+ },
+ {
+ "epoch": 0.4865313872077845,
+ "grad_norm": 21629.720703125,
+ "learning_rate": 6.66278865034428e-05,
+ "loss": 0.4301,
+ "step": 94300
+ },
+ {
+ "epoch": 0.48678935719039734,
+ "grad_norm": 23665.4609375,
+ "learning_rate": 6.659343111159043e-05,
+ "loss": 0.4267,
+ "step": 94350
+ },
+ {
+ "epoch": 0.48704732717301014,
+ "grad_norm": 23254.232421875,
+ "learning_rate": 6.655896686248583e-05,
+ "loss": 0.4266,
+ "step": 94400
+ },
+ {
+ "epoch": 0.487305297155623,
+ "grad_norm": 22491.404296875,
+ "learning_rate": 6.652449377452539e-05,
+ "loss": 0.4278,
+ "step": 94450
+ },
+ {
+ "epoch": 0.4875632671382358,
+ "grad_norm": 21071.74609375,
+ "learning_rate": 6.649001186611015e-05,
+ "loss": 0.4308,
+ "step": 94500
+ },
+ {
+ "epoch": 0.4878212371208486,
+ "grad_norm": 20860.861328125,
+ "learning_rate": 6.64555211556459e-05,
+ "loss": 0.4308,
+ "step": 94550
+ },
+ {
+ "epoch": 0.4880792071034614,
+ "grad_norm": 21733.033203125,
+ "learning_rate": 6.642102166154308e-05,
+ "loss": 0.4376,
+ "step": 94600
+ },
+ {
+ "epoch": 0.48833717708607427,
+ "grad_norm": 22799.3984375,
+ "learning_rate": 6.638651340221687e-05,
+ "loss": 0.4289,
+ "step": 94650
+ },
+ {
+ "epoch": 0.48859514706868706,
+ "grad_norm": 21678.296875,
+ "learning_rate": 6.635199639608709e-05,
+ "loss": 0.4301,
+ "step": 94700
+ },
+ {
+ "epoch": 0.4888531170512999,
+ "grad_norm": 20510.052734375,
+ "learning_rate": 6.631747066157831e-05,
+ "loss": 0.4276,
+ "step": 94750
+ },
+ {
+ "epoch": 0.48911108703391276,
+ "grad_norm": 21075.474609375,
+ "learning_rate": 6.628293621711964e-05,
+ "loss": 0.435,
+ "step": 94800
+ },
+ {
+ "epoch": 0.48936905701652555,
+ "grad_norm": 22063.083984375,
+ "learning_rate": 6.624839308114492e-05,
+ "loss": 0.434,
+ "step": 94850
+ },
+ {
+ "epoch": 0.4896270269991384,
+ "grad_norm": 20185.99609375,
+ "learning_rate": 6.621384127209261e-05,
+ "loss": 0.4246,
+ "step": 94900
+ },
+ {
+ "epoch": 0.4898849969817512,
+ "grad_norm": 22002.326171875,
+ "learning_rate": 6.61792808084058e-05,
+ "loss": 0.4272,
+ "step": 94950
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "grad_norm": 22271.25,
+ "learning_rate": 6.614471170853218e-05,
+ "loss": 0.4323,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "eval_loss": 0.4187907576560974,
+ "eval_runtime": 3274.3922,
+ "eval_samples_per_second": 947.083,
+ "eval_steps_per_second": 1.85,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49040093694697684,
+ "grad_norm": 20668.224609375,
+ "learning_rate": 6.611013399092406e-05,
+ "loss": 0.4285,
+ "step": 95050
+ },
+ {
+ "epoch": 0.4906589069295897,
+ "grad_norm": 20890.05078125,
+ "learning_rate": 6.607554767403838e-05,
+ "loss": 0.4333,
+ "step": 95100
+ },
+ {
+ "epoch": 0.4909168769122025,
+ "grad_norm": 22767.6875,
+ "learning_rate": 6.604095277633664e-05,
+ "loss": 0.4284,
+ "step": 95150
+ },
+ {
+ "epoch": 0.49117484689481533,
+ "grad_norm": 22603.083984375,
+ "learning_rate": 6.600634931628493e-05,
+ "loss": 0.4332,
+ "step": 95200
+ },
+ {
+ "epoch": 0.4914328168774281,
+ "grad_norm": 25005.8984375,
+ "learning_rate": 6.597173731235388e-05,
+ "loss": 0.4284,
+ "step": 95250
+ },
+ {
+ "epoch": 0.491690786860041,
+ "grad_norm": 23687.4765625,
+ "learning_rate": 6.593711678301874e-05,
+ "loss": 0.4316,
+ "step": 95300
+ },
+ {
+ "epoch": 0.49194875684265377,
+ "grad_norm": 19670.087890625,
+ "learning_rate": 6.590248774675926e-05,
+ "loss": 0.4326,
+ "step": 95350
+ },
+ {
+ "epoch": 0.4922067268252666,
+ "grad_norm": 23065.818359375,
+ "learning_rate": 6.586785022205977e-05,
+ "loss": 0.4316,
+ "step": 95400
+ },
+ {
+ "epoch": 0.4924646968078794,
+ "grad_norm": 21279.01953125,
+ "learning_rate": 6.583320422740909e-05,
+ "loss": 0.4278,
+ "step": 95450
+ },
+ {
+ "epoch": 0.49272266679049226,
+ "grad_norm": 19707.6328125,
+ "learning_rate": 6.579854978130057e-05,
+ "loss": 0.4272,
+ "step": 95500
+ },
+ {
+ "epoch": 0.4929806367731051,
+ "grad_norm": 22938.3515625,
+ "learning_rate": 6.57638869022321e-05,
+ "loss": 0.4316,
+ "step": 95550
+ },
+ {
+ "epoch": 0.4932386067557179,
+ "grad_norm": 24812.65625,
+ "learning_rate": 6.572921560870607e-05,
+ "loss": 0.4315,
+ "step": 95600
+ },
+ {
+ "epoch": 0.49349657673833075,
+ "grad_norm": 21462.873046875,
+ "learning_rate": 6.569453591922931e-05,
+ "loss": 0.4299,
+ "step": 95650
+ },
+ {
+ "epoch": 0.49375454672094354,
+ "grad_norm": 22590.384765625,
+ "learning_rate": 6.565984785231318e-05,
+ "loss": 0.4294,
+ "step": 95700
+ },
+ {
+ "epoch": 0.4940125167035564,
+ "grad_norm": 23677.619140625,
+ "learning_rate": 6.56251514264735e-05,
+ "loss": 0.4379,
+ "step": 95750
+ },
+ {
+ "epoch": 0.4942704866861692,
+ "grad_norm": 22078.87109375,
+ "learning_rate": 6.559044666023057e-05,
+ "loss": 0.4276,
+ "step": 95800
+ },
+ {
+ "epoch": 0.49452845666878203,
+ "grad_norm": 22440.369140625,
+ "learning_rate": 6.55557335721091e-05,
+ "loss": 0.4279,
+ "step": 95850
+ },
+ {
+ "epoch": 0.49478642665139483,
+ "grad_norm": 24544.12109375,
+ "learning_rate": 6.552101218063826e-05,
+ "loss": 0.4305,
+ "step": 95900
+ },
+ {
+ "epoch": 0.4950443966340077,
+ "grad_norm": 21647.107421875,
+ "learning_rate": 6.548628250435167e-05,
+ "loss": 0.4328,
+ "step": 95950
+ },
+ {
+ "epoch": 0.49530236661662047,
+ "grad_norm": 21392.28125,
+ "learning_rate": 6.545154456178735e-05,
+ "loss": 0.4299,
+ "step": 96000
+ },
+ {
+ "epoch": 0.4955603365992333,
+ "grad_norm": 19458.55078125,
+ "learning_rate": 6.541679837148775e-05,
+ "loss": 0.4375,
+ "step": 96050
+ },
+ {
+ "epoch": 0.4958183065818461,
+ "grad_norm": 21774.14453125,
+ "learning_rate": 6.53820439519997e-05,
+ "loss": 0.4348,
+ "step": 96100
+ },
+ {
+ "epoch": 0.49607627656445896,
+ "grad_norm": 22902.63671875,
+ "learning_rate": 6.534728132187444e-05,
+ "loss": 0.4297,
+ "step": 96150
+ },
+ {
+ "epoch": 0.49633424654707176,
+ "grad_norm": 20869.306640625,
+ "learning_rate": 6.531251049966762e-05,
+ "loss": 0.4313,
+ "step": 96200
+ },
+ {
+ "epoch": 0.4965922165296846,
+ "grad_norm": 23554.537109375,
+ "learning_rate": 6.527773150393919e-05,
+ "loss": 0.4313,
+ "step": 96250
+ },
+ {
+ "epoch": 0.49685018651229745,
+ "grad_norm": 23000.92578125,
+ "learning_rate": 6.524294435325351e-05,
+ "loss": 0.4266,
+ "step": 96300
+ },
+ {
+ "epoch": 0.49710815649491025,
+ "grad_norm": 21331.72265625,
+ "learning_rate": 6.52081490661793e-05,
+ "loss": 0.4261,
+ "step": 96350
+ },
+ {
+ "epoch": 0.4973661264775231,
+ "grad_norm": 22540.75,
+ "learning_rate": 6.517334566128961e-05,
+ "loss": 0.4282,
+ "step": 96400
+ },
+ {
+ "epoch": 0.4976240964601359,
+ "grad_norm": 21733.560546875,
+ "learning_rate": 6.51385341571618e-05,
+ "loss": 0.43,
+ "step": 96450
+ },
+ {
+ "epoch": 0.49788206644274874,
+ "grad_norm": 23288.21875,
+ "learning_rate": 6.510371457237765e-05,
+ "loss": 0.4306,
+ "step": 96500
+ },
+ {
+ "epoch": 0.49814003642536153,
+ "grad_norm": 24475.9453125,
+ "learning_rate": 6.506888692552309e-05,
+ "loss": 0.4299,
+ "step": 96550
+ },
+ {
+ "epoch": 0.4983980064079744,
+ "grad_norm": 20756.5078125,
+ "learning_rate": 6.503405123518847e-05,
+ "loss": 0.4292,
+ "step": 96600
+ },
+ {
+ "epoch": 0.4986559763905872,
+ "grad_norm": 21059.365234375,
+ "learning_rate": 6.499920751996845e-05,
+ "loss": 0.4261,
+ "step": 96650
+ },
+ {
+ "epoch": 0.4989139463732,
+ "grad_norm": 22173.65625,
+ "learning_rate": 6.496435579846188e-05,
+ "loss": 0.4309,
+ "step": 96700
+ },
+ {
+ "epoch": 0.4991719163558128,
+ "grad_norm": 23941.49609375,
+ "learning_rate": 6.492949608927196e-05,
+ "loss": 0.4355,
+ "step": 96750
+ },
+ {
+ "epoch": 0.49942988633842567,
+ "grad_norm": 22027.400390625,
+ "learning_rate": 6.489462841100611e-05,
+ "loss": 0.433,
+ "step": 96800
+ },
+ {
+ "epoch": 0.49968785632103846,
+ "grad_norm": 21414.77734375,
+ "learning_rate": 6.485975278227605e-05,
+ "loss": 0.4291,
+ "step": 96850
+ },
+ {
+ "epoch": 0.4999458263036513,
+ "grad_norm": 23023.60546875,
+ "learning_rate": 6.482486922169767e-05,
+ "loss": 0.4309,
+ "step": 96900
+ },
+ {
+ "epoch": 0.5002037962862641,
+ "grad_norm": 23856.318359375,
+ "learning_rate": 6.478997774789119e-05,
+ "loss": 0.4314,
+ "step": 96950
+ },
+ {
+ "epoch": 0.500461766268877,
+ "grad_norm": 21834.822265625,
+ "learning_rate": 6.475507837948096e-05,
+ "loss": 0.4319,
+ "step": 97000
+ },
+ {
+ "epoch": 0.5007197362514898,
+ "grad_norm": 22487.779296875,
+ "learning_rate": 6.472017113509561e-05,
+ "loss": 0.4281,
+ "step": 97050
+ },
+ {
+ "epoch": 0.5009777062341026,
+ "grad_norm": 23955.73046875,
+ "learning_rate": 6.468525603336796e-05,
+ "loss": 0.4324,
+ "step": 97100
+ },
+ {
+ "epoch": 0.5012356762167154,
+ "grad_norm": 23631.203125,
+ "learning_rate": 6.4650333092935e-05,
+ "loss": 0.4333,
+ "step": 97150
+ },
+ {
+ "epoch": 0.5014936461993282,
+ "grad_norm": 21347.26953125,
+ "learning_rate": 6.461540233243792e-05,
+ "loss": 0.421,
+ "step": 97200
+ },
+ {
+ "epoch": 0.5017516161819411,
+ "grad_norm": 23590.9140625,
+ "learning_rate": 6.458046377052209e-05,
+ "loss": 0.4347,
+ "step": 97250
+ },
+ {
+ "epoch": 0.5020095861645539,
+ "grad_norm": 23192.708984375,
+ "learning_rate": 6.454551742583703e-05,
+ "loss": 0.4363,
+ "step": 97300
+ },
+ {
+ "epoch": 0.5022675561471667,
+ "grad_norm": 23588.974609375,
+ "learning_rate": 6.451056331703643e-05,
+ "loss": 0.4268,
+ "step": 97350
+ },
+ {
+ "epoch": 0.5025255261297795,
+ "grad_norm": 19536.3046875,
+ "learning_rate": 6.44756014627781e-05,
+ "loss": 0.4268,
+ "step": 97400
+ },
+ {
+ "epoch": 0.5027834961123924,
+ "grad_norm": 20248.345703125,
+ "learning_rate": 6.444063188172401e-05,
+ "loss": 0.4286,
+ "step": 97450
+ },
+ {
+ "epoch": 0.5030414660950052,
+ "grad_norm": 21598.1171875,
+ "learning_rate": 6.440565459254027e-05,
+ "loss": 0.4302,
+ "step": 97500
+ },
+ {
+ "epoch": 0.503299436077618,
+ "grad_norm": 25492.541015625,
+ "learning_rate": 6.437066961389704e-05,
+ "loss": 0.4223,
+ "step": 97550
+ },
+ {
+ "epoch": 0.5035574060602308,
+ "grad_norm": 22227.8125,
+ "learning_rate": 6.433567696446865e-05,
+ "loss": 0.4194,
+ "step": 97600
+ },
+ {
+ "epoch": 0.5038153760428437,
+ "grad_norm": 23799.134765625,
+ "learning_rate": 6.430067666293348e-05,
+ "loss": 0.4239,
+ "step": 97650
+ },
+ {
+ "epoch": 0.5040733460254565,
+ "grad_norm": 25147.080078125,
+ "learning_rate": 6.426566872797403e-05,
+ "loss": 0.4369,
+ "step": 97700
+ },
+ {
+ "epoch": 0.5043313160080694,
+ "grad_norm": 22497.68359375,
+ "learning_rate": 6.423065317827686e-05,
+ "loss": 0.4332,
+ "step": 97750
+ },
+ {
+ "epoch": 0.5045892859906821,
+ "grad_norm": 23273.966796875,
+ "learning_rate": 6.419563003253258e-05,
+ "loss": 0.4331,
+ "step": 97800
+ },
+ {
+ "epoch": 0.5048472559732949,
+ "grad_norm": 21943.7734375,
+ "learning_rate": 6.416059930943585e-05,
+ "loss": 0.4331,
+ "step": 97850
+ },
+ {
+ "epoch": 0.5051052259559078,
+ "grad_norm": 23134.685546875,
+ "learning_rate": 6.412556102768544e-05,
+ "loss": 0.4283,
+ "step": 97900
+ },
+ {
+ "epoch": 0.5053631959385206,
+ "grad_norm": 21504.177734375,
+ "learning_rate": 6.409051520598405e-05,
+ "loss": 0.4319,
+ "step": 97950
+ },
+ {
+ "epoch": 0.5056211659211334,
+ "grad_norm": 25481.029296875,
+ "learning_rate": 6.405546186303852e-05,
+ "loss": 0.4268,
+ "step": 98000
+ },
+ {
+ "epoch": 0.5058791359037462,
+ "grad_norm": 21170.70703125,
+ "learning_rate": 6.402040101755961e-05,
+ "loss": 0.4253,
+ "step": 98050
+ },
+ {
+ "epoch": 0.5061371058863591,
+ "grad_norm": 20005.333984375,
+ "learning_rate": 6.398533268826212e-05,
+ "loss": 0.4267,
+ "step": 98100
+ },
+ {
+ "epoch": 0.5063950758689719,
+ "grad_norm": 20913.32421875,
+ "learning_rate": 6.395025689386485e-05,
+ "loss": 0.4245,
+ "step": 98150
+ },
+ {
+ "epoch": 0.5066530458515847,
+ "grad_norm": 24310.720703125,
+ "learning_rate": 6.391517365309059e-05,
+ "loss": 0.4246,
+ "step": 98200
+ },
+ {
+ "epoch": 0.5069110158341975,
+ "grad_norm": 21981.455078125,
+ "learning_rate": 6.388008298466607e-05,
+ "loss": 0.4286,
+ "step": 98250
+ },
+ {
+ "epoch": 0.5071689858168104,
+ "grad_norm": 23764.30078125,
+ "learning_rate": 6.384498490732202e-05,
+ "loss": 0.4282,
+ "step": 98300
+ },
+ {
+ "epoch": 0.5074269557994232,
+ "grad_norm": 20518.447265625,
+ "learning_rate": 6.380987943979314e-05,
+ "loss": 0.4333,
+ "step": 98350
+ },
+ {
+ "epoch": 0.5076849257820361,
+ "grad_norm": 23327.80859375,
+ "learning_rate": 6.377476660081803e-05,
+ "loss": 0.4255,
+ "step": 98400
+ },
+ {
+ "epoch": 0.5079428957646488,
+ "grad_norm": 19600.84375,
+ "learning_rate": 6.373964640913924e-05,
+ "loss": 0.4277,
+ "step": 98450
+ },
+ {
+ "epoch": 0.5082008657472616,
+ "grad_norm": 23252.146484375,
+ "learning_rate": 6.370451888350322e-05,
+ "loss": 0.4311,
+ "step": 98500
+ },
+ {
+ "epoch": 0.5084588357298745,
+ "grad_norm": 21930.736328125,
+ "learning_rate": 6.366938404266041e-05,
+ "loss": 0.4329,
+ "step": 98550
+ },
+ {
+ "epoch": 0.5087168057124873,
+ "grad_norm": 21249.69140625,
+ "learning_rate": 6.36342419053651e-05,
+ "loss": 0.4257,
+ "step": 98600
+ },
+ {
+ "epoch": 0.5089747756951001,
+ "grad_norm": 21809.4609375,
+ "learning_rate": 6.359909249037548e-05,
+ "loss": 0.431,
+ "step": 98650
+ },
+ {
+ "epoch": 0.5092327456777129,
+ "grad_norm": 23142.6796875,
+ "learning_rate": 6.356393581645359e-05,
+ "loss": 0.4329,
+ "step": 98700
+ },
+ {
+ "epoch": 0.5094907156603258,
+ "grad_norm": 21783.541015625,
+ "learning_rate": 6.352877190236542e-05,
+ "loss": 0.4362,
+ "step": 98750
+ },
+ {
+ "epoch": 0.5097486856429386,
+ "grad_norm": 22534.080078125,
+ "learning_rate": 6.349360076688079e-05,
+ "loss": 0.4302,
+ "step": 98800
+ },
+ {
+ "epoch": 0.5100066556255514,
+ "grad_norm": 22630.03515625,
+ "learning_rate": 6.345842242877336e-05,
+ "loss": 0.4314,
+ "step": 98850
+ },
+ {
+ "epoch": 0.5102646256081642,
+ "grad_norm": 23446.0390625,
+ "learning_rate": 6.342323690682064e-05,
+ "loss": 0.428,
+ "step": 98900
+ },
+ {
+ "epoch": 0.5105225955907771,
+ "grad_norm": 25644.2734375,
+ "learning_rate": 6.338804421980398e-05,
+ "loss": 0.4219,
+ "step": 98950
+ },
+ {
+ "epoch": 0.5107805655733899,
+ "grad_norm": 23159.580078125,
+ "learning_rate": 6.335284438650856e-05,
+ "loss": 0.434,
+ "step": 99000
+ },
+ {
+ "epoch": 0.5110385355560026,
+ "grad_norm": 23536.556640625,
+ "learning_rate": 6.331763742572337e-05,
+ "loss": 0.4293,
+ "step": 99050
+ },
+ {
+ "epoch": 0.5112965055386155,
+ "grad_norm": 23240.662109375,
+ "learning_rate": 6.328242335624121e-05,
+ "loss": 0.434,
+ "step": 99100
+ },
+ {
+ "epoch": 0.5115544755212283,
+ "grad_norm": 22368.94921875,
+ "learning_rate": 6.324720219685866e-05,
+ "loss": 0.4295,
+ "step": 99150
+ },
+ {
+ "epoch": 0.5118124455038412,
+ "grad_norm": 23257.068359375,
+ "learning_rate": 6.321197396637608e-05,
+ "loss": 0.4198,
+ "step": 99200
+ },
+ {
+ "epoch": 0.512070415486454,
+ "grad_norm": 21806.6953125,
+ "learning_rate": 6.317673868359765e-05,
+ "loss": 0.4241,
+ "step": 99250
+ },
+ {
+ "epoch": 0.5123283854690668,
+ "grad_norm": 24117.416015625,
+ "learning_rate": 6.314149636733125e-05,
+ "loss": 0.4261,
+ "step": 99300
+ },
+ {
+ "epoch": 0.5125863554516796,
+ "grad_norm": 25262.626953125,
+ "learning_rate": 6.310624703638858e-05,
+ "loss": 0.4234,
+ "step": 99350
+ },
+ {
+ "epoch": 0.5128443254342925,
+ "grad_norm": 22739.923828125,
+ "learning_rate": 6.3070990709585e-05,
+ "loss": 0.4299,
+ "step": 99400
+ },
+ {
+ "epoch": 0.5131022954169053,
+ "grad_norm": 20651.646484375,
+ "learning_rate": 6.303572740573971e-05,
+ "loss": 0.4307,
+ "step": 99450
+ },
+ {
+ "epoch": 0.5133602653995181,
+ "grad_norm": 22125.037109375,
+ "learning_rate": 6.300045714367555e-05,
+ "loss": 0.4216,
+ "step": 99500
+ },
+ {
+ "epoch": 0.5136182353821309,
+ "grad_norm": 22210.080078125,
+ "learning_rate": 6.29651799422191e-05,
+ "loss": 0.429,
+ "step": 99550
+ },
+ {
+ "epoch": 0.5138762053647438,
+ "grad_norm": 23850.673828125,
+ "learning_rate": 6.292989582020063e-05,
+ "loss": 0.4337,
+ "step": 99600
+ },
+ {
+ "epoch": 0.5141341753473566,
+ "grad_norm": 21346.251953125,
+ "learning_rate": 6.289460479645417e-05,
+ "loss": 0.4352,
+ "step": 99650
+ },
+ {
+ "epoch": 0.5143921453299694,
+ "grad_norm": 22687.080078125,
+ "learning_rate": 6.285930688981735e-05,
+ "loss": 0.433,
+ "step": 99700
+ },
+ {
+ "epoch": 0.5146501153125822,
+ "grad_norm": 20447.666015625,
+ "learning_rate": 6.282400211913154e-05,
+ "loss": 0.4288,
+ "step": 99750
+ },
+ {
+ "epoch": 0.514908085295195,
+ "grad_norm": 21768.51953125,
+ "learning_rate": 6.278869050324168e-05,
+ "loss": 0.4363,
+ "step": 99800
+ },
+ {
+ "epoch": 0.5151660552778079,
+ "grad_norm": 21896.47265625,
+ "learning_rate": 6.27533720609965e-05,
+ "loss": 0.4307,
+ "step": 99850
+ },
+ {
+ "epoch": 0.5154240252604207,
+ "grad_norm": 22967.384765625,
+ "learning_rate": 6.271804681124827e-05,
+ "loss": 0.4295,
+ "step": 99900
+ },
+ {
+ "epoch": 0.5156819952430335,
+ "grad_norm": 20233.869140625,
+ "learning_rate": 6.268271477285292e-05,
+ "loss": 0.4329,
+ "step": 99950
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "grad_norm": 20550.060546875,
+ "learning_rate": 6.264737596466998e-05,
+ "loss": 0.4267,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "eval_loss": 0.4161209166049957,
+ "eval_runtime": 2887.0736,
+ "eval_samples_per_second": 1074.14,
+ "eval_steps_per_second": 2.098,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5161979352082592,
+ "grad_norm": 22327.767578125,
+ "learning_rate": 6.261203040556267e-05,
+ "loss": 0.4272,
+ "step": 100050
+ },
+ {
+ "epoch": 0.516455905190872,
+ "grad_norm": 22512.1640625,
+ "learning_rate": 6.257667811439776e-05,
+ "loss": 0.4267,
+ "step": 100100
+ },
+ {
+ "epoch": 0.5167138751734848,
+ "grad_norm": 22710.8828125,
+ "learning_rate": 6.254131911004561e-05,
+ "loss": 0.42,
+ "step": 100150
+ },
+ {
+ "epoch": 0.5169718451560976,
+ "grad_norm": 21731.365234375,
+ "learning_rate": 6.250595341138014e-05,
+ "loss": 0.4259,
+ "step": 100200
+ },
+ {
+ "epoch": 0.5172298151387105,
+ "grad_norm": 21478.970703125,
+ "learning_rate": 6.247058103727892e-05,
+ "loss": 0.4217,
+ "step": 100250
+ },
+ {
+ "epoch": 0.5174877851213233,
+ "grad_norm": 22431.939453125,
+ "learning_rate": 6.243520200662303e-05,
+ "loss": 0.4272,
+ "step": 100300
+ },
+ {
+ "epoch": 0.5177457551039361,
+ "grad_norm": 22137.5078125,
+ "learning_rate": 6.239981633829709e-05,
+ "loss": 0.4301,
+ "step": 100350
+ },
+ {
+ "epoch": 0.5180037250865489,
+ "grad_norm": 22802.220703125,
+ "learning_rate": 6.23644240511893e-05,
+ "loss": 0.4346,
+ "step": 100400
+ },
+ {
+ "epoch": 0.5182616950691618,
+ "grad_norm": 20567.640625,
+ "learning_rate": 6.232902516419137e-05,
+ "loss": 0.4271,
+ "step": 100450
+ },
+ {
+ "epoch": 0.5185196650517746,
+ "grad_norm": 20855.70703125,
+ "learning_rate": 6.229361969619855e-05,
+ "loss": 0.4237,
+ "step": 100500
+ },
+ {
+ "epoch": 0.5187776350343875,
+ "grad_norm": 22052.44921875,
+ "learning_rate": 6.225820766610958e-05,
+ "loss": 0.4324,
+ "step": 100550
+ },
+ {
+ "epoch": 0.5190356050170002,
+ "grad_norm": 21984.818359375,
+ "learning_rate": 6.222278909282674e-05,
+ "loss": 0.4315,
+ "step": 100600
+ },
+ {
+ "epoch": 0.519293574999613,
+ "grad_norm": 22044.8359375,
+ "learning_rate": 6.218736399525575e-05,
+ "loss": 0.4324,
+ "step": 100650
+ },
+ {
+ "epoch": 0.5195515449822259,
+ "grad_norm": 22661.78515625,
+ "learning_rate": 6.215193239230586e-05,
+ "loss": 0.4273,
+ "step": 100700
+ },
+ {
+ "epoch": 0.5198095149648387,
+ "grad_norm": 22091.01171875,
+ "learning_rate": 6.211649430288976e-05,
+ "loss": 0.4252,
+ "step": 100750
+ },
+ {
+ "epoch": 0.5200674849474515,
+ "grad_norm": 22164.376953125,
+ "learning_rate": 6.208104974592364e-05,
+ "loss": 0.4272,
+ "step": 100800
+ },
+ {
+ "epoch": 0.5203254549300643,
+ "grad_norm": 23387.287109375,
+ "learning_rate": 6.20455987403271e-05,
+ "loss": 0.4281,
+ "step": 100850
+ },
+ {
+ "epoch": 0.5205834249126772,
+ "grad_norm": 22505.326171875,
+ "learning_rate": 6.201014130502317e-05,
+ "loss": 0.4285,
+ "step": 100900
+ },
+ {
+ "epoch": 0.52084139489529,
+ "grad_norm": 21150.341796875,
+ "learning_rate": 6.19746774589384e-05,
+ "loss": 0.4274,
+ "step": 100950
+ },
+ {
+ "epoch": 0.5210993648779028,
+ "grad_norm": 23076.650390625,
+ "learning_rate": 6.193920722100268e-05,
+ "loss": 0.4289,
+ "step": 101000
+ },
+ {
+ "epoch": 0.5213573348605156,
+ "grad_norm": 20890.41796875,
+ "learning_rate": 6.190373061014932e-05,
+ "loss": 0.4305,
+ "step": 101050
+ },
+ {
+ "epoch": 0.5216153048431285,
+ "grad_norm": 22231.6328125,
+ "learning_rate": 6.186824764531507e-05,
+ "loss": 0.4304,
+ "step": 101100
+ },
+ {
+ "epoch": 0.5218732748257413,
+ "grad_norm": 22094.197265625,
+ "learning_rate": 6.183275834544005e-05,
+ "loss": 0.4279,
+ "step": 101150
+ },
+ {
+ "epoch": 0.522131244808354,
+ "grad_norm": 23188.353515625,
+ "learning_rate": 6.179726272946774e-05,
+ "loss": 0.4272,
+ "step": 101200
+ },
+ {
+ "epoch": 0.5223892147909669,
+ "grad_norm": 22908.5,
+ "learning_rate": 6.176176081634504e-05,
+ "loss": 0.4229,
+ "step": 101250
+ },
+ {
+ "epoch": 0.5226471847735797,
+ "grad_norm": 21536.37109375,
+ "learning_rate": 6.172625262502215e-05,
+ "loss": 0.4267,
+ "step": 101300
+ },
+ {
+ "epoch": 0.5229051547561926,
+ "grad_norm": 22923.38671875,
+ "learning_rate": 6.169073817445268e-05,
+ "loss": 0.4256,
+ "step": 101350
+ },
+ {
+ "epoch": 0.5231631247388054,
+ "grad_norm": 22802.669921875,
+ "learning_rate": 6.165521748359356e-05,
+ "loss": 0.4241,
+ "step": 101400
+ },
+ {
+ "epoch": 0.5234210947214182,
+ "grad_norm": 22852.59765625,
+ "learning_rate": 6.161969057140504e-05,
+ "loss": 0.4275,
+ "step": 101450
+ },
+ {
+ "epoch": 0.523679064704031,
+ "grad_norm": 27410.056640625,
+ "learning_rate": 6.158415745685068e-05,
+ "loss": 0.4316,
+ "step": 101500
+ },
+ {
+ "epoch": 0.5239370346866439,
+ "grad_norm": 21783.482421875,
+ "learning_rate": 6.15486181588974e-05,
+ "loss": 0.4235,
+ "step": 101550
+ },
+ {
+ "epoch": 0.5241950046692567,
+ "grad_norm": 21013.259765625,
+ "learning_rate": 6.151307269651536e-05,
+ "loss": 0.426,
+ "step": 101600
+ },
+ {
+ "epoch": 0.5244529746518695,
+ "grad_norm": 23852.673828125,
+ "learning_rate": 6.147752108867807e-05,
+ "loss": 0.4226,
+ "step": 101650
+ },
+ {
+ "epoch": 0.5247109446344823,
+ "grad_norm": 24846.427734375,
+ "learning_rate": 6.144196335436225e-05,
+ "loss": 0.4277,
+ "step": 101700
+ },
+ {
+ "epoch": 0.5249689146170952,
+ "grad_norm": 21197.177734375,
+ "learning_rate": 6.140639951254796e-05,
+ "loss": 0.4247,
+ "step": 101750
+ },
+ {
+ "epoch": 0.525226884599708,
+ "grad_norm": 24620.37890625,
+ "learning_rate": 6.137082958221848e-05,
+ "loss": 0.429,
+ "step": 101800
+ },
+ {
+ "epoch": 0.5254848545823207,
+ "grad_norm": 22811.875,
+ "learning_rate": 6.133525358236036e-05,
+ "loss": 0.4274,
+ "step": 101850
+ },
+ {
+ "epoch": 0.5257428245649336,
+ "grad_norm": 20224.125,
+ "learning_rate": 6.129967153196336e-05,
+ "loss": 0.4338,
+ "step": 101900
+ },
+ {
+ "epoch": 0.5260007945475464,
+ "grad_norm": 21489.734375,
+ "learning_rate": 6.126408345002052e-05,
+ "loss": 0.4333,
+ "step": 101950
+ },
+ {
+ "epoch": 0.5262587645301593,
+ "grad_norm": 21771.20703125,
+ "learning_rate": 6.122848935552804e-05,
+ "loss": 0.4258,
+ "step": 102000
+ },
+ {
+ "epoch": 0.5265167345127721,
+ "grad_norm": 23362.43359375,
+ "learning_rate": 6.119288926748537e-05,
+ "loss": 0.4234,
+ "step": 102050
+ },
+ {
+ "epoch": 0.5267747044953849,
+ "grad_norm": 20869.46484375,
+ "learning_rate": 6.115728320489516e-05,
+ "loss": 0.4233,
+ "step": 102100
+ },
+ {
+ "epoch": 0.5270326744779977,
+ "grad_norm": 21146.568359375,
+ "learning_rate": 6.11216711867632e-05,
+ "loss": 0.4243,
+ "step": 102150
+ },
+ {
+ "epoch": 0.5272906444606106,
+ "grad_norm": 24031.97265625,
+ "learning_rate": 6.108605323209853e-05,
+ "loss": 0.4334,
+ "step": 102200
+ },
+ {
+ "epoch": 0.5275486144432234,
+ "grad_norm": 23461.306640625,
+ "learning_rate": 6.10504293599133e-05,
+ "loss": 0.4289,
+ "step": 102250
+ },
+ {
+ "epoch": 0.5278065844258362,
+ "grad_norm": 21013.169921875,
+ "learning_rate": 6.101479958922287e-05,
+ "loss": 0.4334,
+ "step": 102300
+ },
+ {
+ "epoch": 0.528064554408449,
+ "grad_norm": 23328.306640625,
+ "learning_rate": 6.0979163939045716e-05,
+ "loss": 0.4285,
+ "step": 102350
+ },
+ {
+ "epoch": 0.5283225243910619,
+ "grad_norm": 21542.20703125,
+ "learning_rate": 6.094352242840343e-05,
+ "loss": 0.4321,
+ "step": 102400
+ },
+ {
+ "epoch": 0.5285804943736747,
+ "grad_norm": 20556.357421875,
+ "learning_rate": 6.09078750763208e-05,
+ "loss": 0.4255,
+ "step": 102450
+ },
+ {
+ "epoch": 0.5288384643562875,
+ "grad_norm": 24925.21875,
+ "learning_rate": 6.0872221901825666e-05,
+ "loss": 0.4225,
+ "step": 102500
+ },
+ {
+ "epoch": 0.5290964343389003,
+ "grad_norm": 22750.419921875,
+ "learning_rate": 6.0836562923949016e-05,
+ "loss": 0.4287,
+ "step": 102550
+ },
+ {
+ "epoch": 0.5293544043215132,
+ "grad_norm": 21514.8984375,
+ "learning_rate": 6.080089816172489e-05,
+ "loss": 0.4254,
+ "step": 102600
+ },
+ {
+ "epoch": 0.529612374304126,
+ "grad_norm": 23347.03125,
+ "learning_rate": 6.07652276341905e-05,
+ "loss": 0.4346,
+ "step": 102650
+ },
+ {
+ "epoch": 0.5298703442867388,
+ "grad_norm": 23180.916015625,
+ "learning_rate": 6.072955136038604e-05,
+ "loss": 0.4244,
+ "step": 102700
+ },
+ {
+ "epoch": 0.5301283142693516,
+ "grad_norm": 20701.431640625,
+ "learning_rate": 6.069386935935484e-05,
+ "loss": 0.43,
+ "step": 102750
+ },
+ {
+ "epoch": 0.5303862842519644,
+ "grad_norm": 23350.99609375,
+ "learning_rate": 6.0658181650143245e-05,
+ "loss": 0.4217,
+ "step": 102800
+ },
+ {
+ "epoch": 0.5306442542345773,
+ "grad_norm": 21068.111328125,
+ "learning_rate": 6.062248825180066e-05,
+ "loss": 0.4278,
+ "step": 102850
+ },
+ {
+ "epoch": 0.5309022242171901,
+ "grad_norm": 23415.25,
+ "learning_rate": 6.0586789183379554e-05,
+ "loss": 0.4331,
+ "step": 102900
+ },
+ {
+ "epoch": 0.5311601941998029,
+ "grad_norm": 22186.048828125,
+ "learning_rate": 6.055108446393538e-05,
+ "loss": 0.4327,
+ "step": 102950
+ },
+ {
+ "epoch": 0.5314181641824157,
+ "grad_norm": 20644.166015625,
+ "learning_rate": 6.051537411252662e-05,
+ "loss": 0.4264,
+ "step": 103000
+ },
+ {
+ "epoch": 0.5316761341650286,
+ "grad_norm": 21755.712890625,
+ "learning_rate": 6.047965814821478e-05,
+ "loss": 0.4253,
+ "step": 103050
+ },
+ {
+ "epoch": 0.5319341041476414,
+ "grad_norm": 22319.177734375,
+ "learning_rate": 6.044393659006435e-05,
+ "loss": 0.4238,
+ "step": 103100
+ },
+ {
+ "epoch": 0.5321920741302542,
+ "grad_norm": 22544.064453125,
+ "learning_rate": 6.040820945714281e-05,
+ "loss": 0.4306,
+ "step": 103150
+ },
+ {
+ "epoch": 0.532450044112867,
+ "grad_norm": 21484.53125,
+ "learning_rate": 6.037247676852059e-05,
+ "loss": 0.4254,
+ "step": 103200
+ },
+ {
+ "epoch": 0.5327080140954799,
+ "grad_norm": 23923.201171875,
+ "learning_rate": 6.033673854327114e-05,
+ "loss": 0.4258,
+ "step": 103250
+ },
+ {
+ "epoch": 0.5329659840780927,
+ "grad_norm": 20412.08984375,
+ "learning_rate": 6.03009948004708e-05,
+ "loss": 0.4286,
+ "step": 103300
+ },
+ {
+ "epoch": 0.5332239540607056,
+ "grad_norm": 19932.908203125,
+ "learning_rate": 6.026524555919891e-05,
+ "loss": 0.4367,
+ "step": 103350
+ },
+ {
+ "epoch": 0.5334819240433183,
+ "grad_norm": 21761.033203125,
+ "learning_rate": 6.022949083853772e-05,
+ "loss": 0.4272,
+ "step": 103400
+ },
+ {
+ "epoch": 0.5337398940259311,
+ "grad_norm": 23392.29296875,
+ "learning_rate": 6.019373065757239e-05,
+ "loss": 0.4274,
+ "step": 103450
+ },
+ {
+ "epoch": 0.533997864008544,
+ "grad_norm": 26151.69921875,
+ "learning_rate": 6.015796503539103e-05,
+ "loss": 0.4189,
+ "step": 103500
+ },
+ {
+ "epoch": 0.5342558339911568,
+ "grad_norm": 22503.529296875,
+ "learning_rate": 6.012219399108463e-05,
+ "loss": 0.428,
+ "step": 103550
+ },
+ {
+ "epoch": 0.5345138039737696,
+ "grad_norm": 25906.685546875,
+ "learning_rate": 6.008641754374709e-05,
+ "loss": 0.4287,
+ "step": 103600
+ },
+ {
+ "epoch": 0.5347717739563824,
+ "grad_norm": 23784.685546875,
+ "learning_rate": 6.005063571247517e-05,
+ "loss": 0.4276,
+ "step": 103650
+ },
+ {
+ "epoch": 0.5350297439389953,
+ "grad_norm": 21574.30078125,
+ "learning_rate": 6.0014848516368515e-05,
+ "loss": 0.4344,
+ "step": 103700
+ },
+ {
+ "epoch": 0.5352877139216081,
+ "grad_norm": 22296.921875,
+ "learning_rate": 5.9979055974529675e-05,
+ "loss": 0.4322,
+ "step": 103750
+ },
+ {
+ "epoch": 0.5355456839042209,
+ "grad_norm": 21478.611328125,
+ "learning_rate": 5.994325810606397e-05,
+ "loss": 0.429,
+ "step": 103800
+ },
+ {
+ "epoch": 0.5358036538868337,
+ "grad_norm": 22572.37109375,
+ "learning_rate": 5.9907454930079645e-05,
+ "loss": 0.4281,
+ "step": 103850
+ },
+ {
+ "epoch": 0.5360616238694466,
+ "grad_norm": 23416.80859375,
+ "learning_rate": 5.98716464656877e-05,
+ "loss": 0.4266,
+ "step": 103900
+ },
+ {
+ "epoch": 0.5363195938520594,
+ "grad_norm": 23470.626953125,
+ "learning_rate": 5.983583273200204e-05,
+ "loss": 0.426,
+ "step": 103950
+ },
+ {
+ "epoch": 0.5365775638346721,
+ "grad_norm": 24464.38671875,
+ "learning_rate": 5.980001374813933e-05,
+ "loss": 0.4218,
+ "step": 104000
+ },
+ {
+ "epoch": 0.536835533817285,
+ "grad_norm": 23835.29296875,
+ "learning_rate": 5.976418953321904e-05,
+ "loss": 0.4261,
+ "step": 104050
+ },
+ {
+ "epoch": 0.5370935037998978,
+ "grad_norm": 23344.654296875,
+ "learning_rate": 5.972836010636346e-05,
+ "loss": 0.4292,
+ "step": 104100
+ },
+ {
+ "epoch": 0.5373514737825107,
+ "grad_norm": 23925.935546875,
+ "learning_rate": 5.9692525486697616e-05,
+ "loss": 0.4323,
+ "step": 104150
+ },
+ {
+ "epoch": 0.5376094437651235,
+ "grad_norm": 23155.76953125,
+ "learning_rate": 5.965668569334937e-05,
+ "loss": 0.428,
+ "step": 104200
+ },
+ {
+ "epoch": 0.5378674137477363,
+ "grad_norm": 22334.19921875,
+ "learning_rate": 5.962084074544928e-05,
+ "loss": 0.4129,
+ "step": 104250
+ },
+ {
+ "epoch": 0.5381253837303491,
+ "grad_norm": 20239.66796875,
+ "learning_rate": 5.95849906621307e-05,
+ "loss": 0.4335,
+ "step": 104300
+ },
+ {
+ "epoch": 0.538383353712962,
+ "grad_norm": 22626.19140625,
+ "learning_rate": 5.9549135462529704e-05,
+ "loss": 0.4274,
+ "step": 104350
+ },
+ {
+ "epoch": 0.5386413236955748,
+ "grad_norm": 21798.65625,
+ "learning_rate": 5.951327516578512e-05,
+ "loss": 0.4258,
+ "step": 104400
+ },
+ {
+ "epoch": 0.5388992936781876,
+ "grad_norm": 21796.7421875,
+ "learning_rate": 5.947740979103845e-05,
+ "loss": 0.4263,
+ "step": 104450
+ },
+ {
+ "epoch": 0.5391572636608004,
+ "grad_norm": 22380.21484375,
+ "learning_rate": 5.944153935743396e-05,
+ "loss": 0.4218,
+ "step": 104500
+ },
+ {
+ "epoch": 0.5394152336434133,
+ "grad_norm": 22526.4296875,
+ "learning_rate": 5.940566388411859e-05,
+ "loss": 0.4233,
+ "step": 104550
+ },
+ {
+ "epoch": 0.5396732036260261,
+ "grad_norm": 22876.5703125,
+ "learning_rate": 5.936978339024195e-05,
+ "loss": 0.4296,
+ "step": 104600
+ },
+ {
+ "epoch": 0.5399311736086388,
+ "grad_norm": 22592.654296875,
+ "learning_rate": 5.9333897894956394e-05,
+ "loss": 0.4287,
+ "step": 104650
+ },
+ {
+ "epoch": 0.5401891435912517,
+ "grad_norm": 21235.43359375,
+ "learning_rate": 5.929800741741688e-05,
+ "loss": 0.4269,
+ "step": 104700
+ },
+ {
+ "epoch": 0.5404471135738645,
+ "grad_norm": 22049.05859375,
+ "learning_rate": 5.926211197678104e-05,
+ "loss": 0.4266,
+ "step": 104750
+ },
+ {
+ "epoch": 0.5407050835564774,
+ "grad_norm": 23252.845703125,
+ "learning_rate": 5.922621159220918e-05,
+ "loss": 0.4223,
+ "step": 104800
+ },
+ {
+ "epoch": 0.5409630535390902,
+ "grad_norm": 20577.1796875,
+ "learning_rate": 5.919030628286424e-05,
+ "loss": 0.4302,
+ "step": 104850
+ },
+ {
+ "epoch": 0.541221023521703,
+ "grad_norm": 24854.8671875,
+ "learning_rate": 5.915439606791174e-05,
+ "loss": 0.4212,
+ "step": 104900
+ },
+ {
+ "epoch": 0.5414789935043158,
+ "grad_norm": 22561.552734375,
+ "learning_rate": 5.9118480966519906e-05,
+ "loss": 0.4196,
+ "step": 104950
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "grad_norm": 23885.4765625,
+ "learning_rate": 5.9082560997859496e-05,
+ "loss": 0.421,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "eval_loss": 0.4132173955440521,
+ "eval_runtime": 2876.3365,
+ "eval_samples_per_second": 1078.149,
+ "eval_steps_per_second": 2.106,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5419949334695415,
+ "grad_norm": 20974.994140625,
+ "learning_rate": 5.90466361811039e-05,
+ "loss": 0.4228,
+ "step": 105050
+ },
+ {
+ "epoch": 0.5422529034521543,
+ "grad_norm": 24338.412109375,
+ "learning_rate": 5.9010706535429086e-05,
+ "loss": 0.4215,
+ "step": 105100
+ },
+ {
+ "epoch": 0.5425108734347671,
+ "grad_norm": 20734.796875,
+ "learning_rate": 5.8974772080013605e-05,
+ "loss": 0.4319,
+ "step": 105150
+ },
+ {
+ "epoch": 0.54276884341738,
+ "grad_norm": 21026.123046875,
+ "learning_rate": 5.8938832834038574e-05,
+ "loss": 0.4318,
+ "step": 105200
+ },
+ {
+ "epoch": 0.5430268133999928,
+ "grad_norm": 20023.287109375,
+ "learning_rate": 5.890288881668766e-05,
+ "loss": 0.4306,
+ "step": 105250
+ },
+ {
+ "epoch": 0.5432847833826056,
+ "grad_norm": 23171.42578125,
+ "learning_rate": 5.88669400471471e-05,
+ "loss": 0.4237,
+ "step": 105300
+ },
+ {
+ "epoch": 0.5435427533652184,
+ "grad_norm": 21692.109375,
+ "learning_rate": 5.8830986544605635e-05,
+ "loss": 0.4261,
+ "step": 105350
+ },
+ {
+ "epoch": 0.5438007233478312,
+ "grad_norm": 22358.216796875,
+ "learning_rate": 5.8795028328254566e-05,
+ "loss": 0.4204,
+ "step": 105400
+ },
+ {
+ "epoch": 0.5440586933304441,
+ "grad_norm": 22529.650390625,
+ "learning_rate": 5.875906541728766e-05,
+ "loss": 0.422,
+ "step": 105450
+ },
+ {
+ "epoch": 0.544316663313057,
+ "grad_norm": 18307.05859375,
+ "learning_rate": 5.8723097830901264e-05,
+ "loss": 0.4236,
+ "step": 105500
+ },
+ {
+ "epoch": 0.5445746332956697,
+ "grad_norm": 22356.583984375,
+ "learning_rate": 5.8687125588294154e-05,
+ "loss": 0.4213,
+ "step": 105550
+ },
+ {
+ "epoch": 0.5448326032782825,
+ "grad_norm": 21446.732421875,
+ "learning_rate": 5.8651148708667625e-05,
+ "loss": 0.4216,
+ "step": 105600
+ },
+ {
+ "epoch": 0.5450905732608954,
+ "grad_norm": 24014.49609375,
+ "learning_rate": 5.8615167211225416e-05,
+ "loss": 0.4283,
+ "step": 105650
+ },
+ {
+ "epoch": 0.5453485432435082,
+ "grad_norm": 22394.306640625,
+ "learning_rate": 5.8579181115173785e-05,
+ "loss": 0.4242,
+ "step": 105700
+ },
+ {
+ "epoch": 0.545606513226121,
+ "grad_norm": 25348.26171875,
+ "learning_rate": 5.8543190439721405e-05,
+ "loss": 0.4234,
+ "step": 105750
+ },
+ {
+ "epoch": 0.5458644832087338,
+ "grad_norm": 22638.720703125,
+ "learning_rate": 5.850719520407939e-05,
+ "loss": 0.4269,
+ "step": 105800
+ },
+ {
+ "epoch": 0.5461224531913467,
+ "grad_norm": 22702.841796875,
+ "learning_rate": 5.847119542746131e-05,
+ "loss": 0.4201,
+ "step": 105850
+ },
+ {
+ "epoch": 0.5463804231739595,
+ "grad_norm": 22299.849609375,
+ "learning_rate": 5.843519112908315e-05,
+ "loss": 0.4243,
+ "step": 105900
+ },
+ {
+ "epoch": 0.5466383931565723,
+ "grad_norm": 21965.283203125,
+ "learning_rate": 5.8399182328163304e-05,
+ "loss": 0.4209,
+ "step": 105950
+ },
+ {
+ "epoch": 0.5468963631391851,
+ "grad_norm": 22101.755859375,
+ "learning_rate": 5.836316904392256e-05,
+ "loss": 0.4254,
+ "step": 106000
+ },
+ {
+ "epoch": 0.547154333121798,
+ "grad_norm": 22735.970703125,
+ "learning_rate": 5.8327151295584126e-05,
+ "loss": 0.4251,
+ "step": 106050
+ },
+ {
+ "epoch": 0.5474123031044108,
+ "grad_norm": 24287.58203125,
+ "learning_rate": 5.829112910237359e-05,
+ "loss": 0.427,
+ "step": 106100
+ },
+ {
+ "epoch": 0.5476702730870235,
+ "grad_norm": 22509.02734375,
+ "learning_rate": 5.825510248351889e-05,
+ "loss": 0.4209,
+ "step": 106150
+ },
+ {
+ "epoch": 0.5479282430696364,
+ "grad_norm": 22325.32421875,
+ "learning_rate": 5.821907145825032e-05,
+ "loss": 0.4276,
+ "step": 106200
+ },
+ {
+ "epoch": 0.5481862130522492,
+ "grad_norm": 21362.255859375,
+ "learning_rate": 5.8183036045800556e-05,
+ "loss": 0.4273,
+ "step": 106250
+ },
+ {
+ "epoch": 0.5484441830348621,
+ "grad_norm": 22934.61328125,
+ "learning_rate": 5.814699626540461e-05,
+ "loss": 0.4318,
+ "step": 106300
+ },
+ {
+ "epoch": 0.5487021530174749,
+ "grad_norm": 23663.65625,
+ "learning_rate": 5.8110952136299814e-05,
+ "loss": 0.4246,
+ "step": 106350
+ },
+ {
+ "epoch": 0.5489601230000877,
+ "grad_norm": 20743.84765625,
+ "learning_rate": 5.807490367772584e-05,
+ "loss": 0.4289,
+ "step": 106400
+ },
+ {
+ "epoch": 0.5492180929827005,
+ "grad_norm": 20859.244140625,
+ "learning_rate": 5.8038850908924636e-05,
+ "loss": 0.4255,
+ "step": 106450
+ },
+ {
+ "epoch": 0.5494760629653134,
+ "grad_norm": 21824.990234375,
+ "learning_rate": 5.800279384914047e-05,
+ "loss": 0.4311,
+ "step": 106500
+ },
+ {
+ "epoch": 0.5497340329479262,
+ "grad_norm": 19514.681640625,
+ "learning_rate": 5.7966732517619926e-05,
+ "loss": 0.4311,
+ "step": 106550
+ },
+ {
+ "epoch": 0.549992002930539,
+ "grad_norm": 24263.765625,
+ "learning_rate": 5.7930666933611835e-05,
+ "loss": 0.4257,
+ "step": 106600
+ },
+ {
+ "epoch": 0.5502499729131518,
+ "grad_norm": 23152.279296875,
+ "learning_rate": 5.789459711636729e-05,
+ "loss": 0.4226,
+ "step": 106650
+ },
+ {
+ "epoch": 0.5505079428957647,
+ "grad_norm": 21756.8671875,
+ "learning_rate": 5.785852308513967e-05,
+ "loss": 0.4266,
+ "step": 106700
+ },
+ {
+ "epoch": 0.5507659128783775,
+ "grad_norm": 20913.3125,
+ "learning_rate": 5.78224448591846e-05,
+ "loss": 0.4228,
+ "step": 106750
+ },
+ {
+ "epoch": 0.5510238828609902,
+ "grad_norm": 24674.92578125,
+ "learning_rate": 5.778636245775996e-05,
+ "loss": 0.4246,
+ "step": 106800
+ },
+ {
+ "epoch": 0.5512818528436031,
+ "grad_norm": 24229.4296875,
+ "learning_rate": 5.775027590012579e-05,
+ "loss": 0.4244,
+ "step": 106850
+ },
+ {
+ "epoch": 0.5515398228262159,
+ "grad_norm": 21722.048828125,
+ "learning_rate": 5.771418520554443e-05,
+ "loss": 0.4264,
+ "step": 106900
+ },
+ {
+ "epoch": 0.5517977928088288,
+ "grad_norm": 22060.224609375,
+ "learning_rate": 5.7678090393280384e-05,
+ "loss": 0.4268,
+ "step": 106950
+ },
+ {
+ "epoch": 0.5520557627914416,
+ "grad_norm": 25690.306640625,
+ "learning_rate": 5.7641991482600366e-05,
+ "loss": 0.4298,
+ "step": 107000
+ },
+ {
+ "epoch": 0.5523137327740544,
+ "grad_norm": 24629.115234375,
+ "learning_rate": 5.7605888492773266e-05,
+ "loss": 0.4223,
+ "step": 107050
+ },
+ {
+ "epoch": 0.5525717027566672,
+ "grad_norm": 23552.78515625,
+ "learning_rate": 5.756978144307018e-05,
+ "loss": 0.4246,
+ "step": 107100
+ },
+ {
+ "epoch": 0.5528296727392801,
+ "grad_norm": 21611.703125,
+ "learning_rate": 5.753367035276431e-05,
+ "loss": 0.4173,
+ "step": 107150
+ },
+ {
+ "epoch": 0.5530876427218929,
+ "grad_norm": 24158.64453125,
+ "learning_rate": 5.749755524113111e-05,
+ "loss": 0.4211,
+ "step": 107200
+ },
+ {
+ "epoch": 0.5533456127045057,
+ "grad_norm": 23446.94140625,
+ "learning_rate": 5.746143612744811e-05,
+ "loss": 0.4262,
+ "step": 107250
+ },
+ {
+ "epoch": 0.5536035826871185,
+ "grad_norm": 21608.703125,
+ "learning_rate": 5.742531303099498e-05,
+ "loss": 0.424,
+ "step": 107300
+ },
+ {
+ "epoch": 0.5538615526697314,
+ "grad_norm": 25070.78125,
+ "learning_rate": 5.738918597105353e-05,
+ "loss": 0.4219,
+ "step": 107350
+ },
+ {
+ "epoch": 0.5541195226523442,
+ "grad_norm": 21161.5234375,
+ "learning_rate": 5.735305496690769e-05,
+ "loss": 0.4293,
+ "step": 107400
+ },
+ {
+ "epoch": 0.554377492634957,
+ "grad_norm": 23108.521484375,
+ "learning_rate": 5.7316920037843516e-05,
+ "loss": 0.427,
+ "step": 107450
+ },
+ {
+ "epoch": 0.5546354626175698,
+ "grad_norm": 22233.87890625,
+ "learning_rate": 5.728078120314909e-05,
+ "loss": 0.4204,
+ "step": 107500
+ },
+ {
+ "epoch": 0.5548934326001826,
+ "grad_norm": 24522.310546875,
+ "learning_rate": 5.724463848211464e-05,
+ "loss": 0.4257,
+ "step": 107550
+ },
+ {
+ "epoch": 0.5551514025827955,
+ "grad_norm": 22916.892578125,
+ "learning_rate": 5.720849189403244e-05,
+ "loss": 0.4251,
+ "step": 107600
+ },
+ {
+ "epoch": 0.5554093725654083,
+ "grad_norm": 20069.236328125,
+ "learning_rate": 5.7172341458196876e-05,
+ "loss": 0.424,
+ "step": 107650
+ },
+ {
+ "epoch": 0.5556673425480211,
+ "grad_norm": 25173.86328125,
+ "learning_rate": 5.713618719390432e-05,
+ "loss": 0.4294,
+ "step": 107700
+ },
+ {
+ "epoch": 0.5559253125306339,
+ "grad_norm": 21957.373046875,
+ "learning_rate": 5.710002912045323e-05,
+ "loss": 0.4239,
+ "step": 107750
+ },
+ {
+ "epoch": 0.5561832825132468,
+ "grad_norm": 20540.82421875,
+ "learning_rate": 5.706386725714407e-05,
+ "loss": 0.4259,
+ "step": 107800
+ },
+ {
+ "epoch": 0.5564412524958596,
+ "grad_norm": 22470.4921875,
+ "learning_rate": 5.702770162327936e-05,
+ "loss": 0.4273,
+ "step": 107850
+ },
+ {
+ "epoch": 0.5566992224784724,
+ "grad_norm": 21721.197265625,
+ "learning_rate": 5.69915322381636e-05,
+ "loss": 0.4233,
+ "step": 107900
+ },
+ {
+ "epoch": 0.5569571924610852,
+ "grad_norm": 21666.955078125,
+ "learning_rate": 5.6955359121103324e-05,
+ "loss": 0.4283,
+ "step": 107950
+ },
+ {
+ "epoch": 0.5572151624436981,
+ "grad_norm": 20970.9296875,
+ "learning_rate": 5.6919182291407014e-05,
+ "loss": 0.4275,
+ "step": 108000
+ },
+ {
+ "epoch": 0.5574731324263109,
+ "grad_norm": 22353.306640625,
+ "learning_rate": 5.688300176838518e-05,
+ "loss": 0.4244,
+ "step": 108050
+ },
+ {
+ "epoch": 0.5577311024089237,
+ "grad_norm": 22607.357421875,
+ "learning_rate": 5.68468175713503e-05,
+ "loss": 0.4252,
+ "step": 108100
+ },
+ {
+ "epoch": 0.5579890723915365,
+ "grad_norm": 21949.1015625,
+ "learning_rate": 5.681062971961677e-05,
+ "loss": 0.4194,
+ "step": 108150
+ },
+ {
+ "epoch": 0.5582470423741493,
+ "grad_norm": 23135.21875,
+ "learning_rate": 5.677443823250099e-05,
+ "loss": 0.425,
+ "step": 108200
+ },
+ {
+ "epoch": 0.5585050123567622,
+ "grad_norm": 19050.34765625,
+ "learning_rate": 5.673824312932123e-05,
+ "loss": 0.422,
+ "step": 108250
+ },
+ {
+ "epoch": 0.5587629823393749,
+ "grad_norm": 22969.15625,
+ "learning_rate": 5.67020444293978e-05,
+ "loss": 0.4253,
+ "step": 108300
+ },
+ {
+ "epoch": 0.5590209523219878,
+ "grad_norm": 22808.203125,
+ "learning_rate": 5.666584215205282e-05,
+ "loss": 0.4261,
+ "step": 108350
+ },
+ {
+ "epoch": 0.5592789223046006,
+ "grad_norm": 23061.126953125,
+ "learning_rate": 5.662963631661038e-05,
+ "loss": 0.4248,
+ "step": 108400
+ },
+ {
+ "epoch": 0.5595368922872135,
+ "grad_norm": 24134.693359375,
+ "learning_rate": 5.659342694239642e-05,
+ "loss": 0.4273,
+ "step": 108450
+ },
+ {
+ "epoch": 0.5597948622698263,
+ "grad_norm": 23659.2578125,
+ "learning_rate": 5.655721404873886e-05,
+ "loss": 0.427,
+ "step": 108500
+ },
+ {
+ "epoch": 0.5600528322524391,
+ "grad_norm": 20205.1953125,
+ "learning_rate": 5.652099765496741e-05,
+ "loss": 0.4257,
+ "step": 108550
+ },
+ {
+ "epoch": 0.5603108022350519,
+ "grad_norm": 21324.837890625,
+ "learning_rate": 5.6484777780413686e-05,
+ "loss": 0.4248,
+ "step": 108600
+ },
+ {
+ "epoch": 0.5605687722176648,
+ "grad_norm": 21779.849609375,
+ "learning_rate": 5.644855444441114e-05,
+ "loss": 0.4259,
+ "step": 108650
+ },
+ {
+ "epoch": 0.5608267422002776,
+ "grad_norm": 20502.0859375,
+ "learning_rate": 5.641232766629512e-05,
+ "loss": 0.4281,
+ "step": 108700
+ },
+ {
+ "epoch": 0.5610847121828904,
+ "grad_norm": 23600.5859375,
+ "learning_rate": 5.637609746540276e-05,
+ "loss": 0.4183,
+ "step": 108750
+ },
+ {
+ "epoch": 0.5613426821655032,
+ "grad_norm": 22977.41015625,
+ "learning_rate": 5.633986386107302e-05,
+ "loss": 0.4219,
+ "step": 108800
+ },
+ {
+ "epoch": 0.561600652148116,
+ "grad_norm": 23411.263671875,
+ "learning_rate": 5.630362687264672e-05,
+ "loss": 0.4268,
+ "step": 108850
+ },
+ {
+ "epoch": 0.5618586221307289,
+ "grad_norm": 20194.060546875,
+ "learning_rate": 5.6267386519466446e-05,
+ "loss": 0.4175,
+ "step": 108900
+ },
+ {
+ "epoch": 0.5621165921133416,
+ "grad_norm": 19387.88671875,
+ "learning_rate": 5.623114282087664e-05,
+ "loss": 0.4274,
+ "step": 108950
+ },
+ {
+ "epoch": 0.5623745620959545,
+ "grad_norm": 23158.28125,
+ "learning_rate": 5.619489579622343e-05,
+ "loss": 0.4222,
+ "step": 109000
+ },
+ {
+ "epoch": 0.5626325320785673,
+ "grad_norm": 23551.431640625,
+ "learning_rate": 5.6158645464854817e-05,
+ "loss": 0.428,
+ "step": 109050
+ },
+ {
+ "epoch": 0.5628905020611802,
+ "grad_norm": 23904.896484375,
+ "learning_rate": 5.6122391846120495e-05,
+ "loss": 0.4252,
+ "step": 109100
+ },
+ {
+ "epoch": 0.563148472043793,
+ "grad_norm": 21354.61328125,
+ "learning_rate": 5.608613495937197e-05,
+ "loss": 0.4202,
+ "step": 109150
+ },
+ {
+ "epoch": 0.5634064420264058,
+ "grad_norm": 23561.978515625,
+ "learning_rate": 5.6049874823962456e-05,
+ "loss": 0.4301,
+ "step": 109200
+ },
+ {
+ "epoch": 0.5636644120090186,
+ "grad_norm": 20979.53515625,
+ "learning_rate": 5.601361145924692e-05,
+ "loss": 0.4204,
+ "step": 109250
+ },
+ {
+ "epoch": 0.5639223819916315,
+ "grad_norm": 24039.125,
+ "learning_rate": 5.5977344884582e-05,
+ "loss": 0.4284,
+ "step": 109300
+ },
+ {
+ "epoch": 0.5641803519742443,
+ "grad_norm": 22242.35546875,
+ "learning_rate": 5.594107511932615e-05,
+ "loss": 0.4248,
+ "step": 109350
+ },
+ {
+ "epoch": 0.5644383219568571,
+ "grad_norm": 20016.1875,
+ "learning_rate": 5.5904802182839434e-05,
+ "loss": 0.4222,
+ "step": 109400
+ },
+ {
+ "epoch": 0.5646962919394699,
+ "grad_norm": 22243.0703125,
+ "learning_rate": 5.5868526094483666e-05,
+ "loss": 0.4276,
+ "step": 109450
+ },
+ {
+ "epoch": 0.5649542619220828,
+ "grad_norm": 23286.38671875,
+ "learning_rate": 5.58322468736223e-05,
+ "loss": 0.4208,
+ "step": 109500
+ },
+ {
+ "epoch": 0.5652122319046956,
+ "grad_norm": 21801.802734375,
+ "learning_rate": 5.579596453962047e-05,
+ "loss": 0.4275,
+ "step": 109550
+ },
+ {
+ "epoch": 0.5654702018873083,
+ "grad_norm": 23282.025390625,
+ "learning_rate": 5.575967911184502e-05,
+ "loss": 0.4255,
+ "step": 109600
+ },
+ {
+ "epoch": 0.5657281718699212,
+ "grad_norm": 25253.943359375,
+ "learning_rate": 5.572339060966439e-05,
+ "loss": 0.4239,
+ "step": 109650
+ },
+ {
+ "epoch": 0.565986141852534,
+ "grad_norm": 22364.595703125,
+ "learning_rate": 5.5687099052448675e-05,
+ "loss": 0.4255,
+ "step": 109700
+ },
+ {
+ "epoch": 0.5662441118351469,
+ "grad_norm": 23305.46484375,
+ "learning_rate": 5.565080445956961e-05,
+ "loss": 0.4254,
+ "step": 109750
+ },
+ {
+ "epoch": 0.5665020818177597,
+ "grad_norm": 20225.2421875,
+ "learning_rate": 5.561450685040054e-05,
+ "loss": 0.4239,
+ "step": 109800
+ },
+ {
+ "epoch": 0.5667600518003725,
+ "grad_norm": 20221.8203125,
+ "learning_rate": 5.557820624431645e-05,
+ "loss": 0.4171,
+ "step": 109850
+ },
+ {
+ "epoch": 0.5670180217829853,
+ "grad_norm": 19833.607421875,
+ "learning_rate": 5.554190266069387e-05,
+ "loss": 0.4224,
+ "step": 109900
+ },
+ {
+ "epoch": 0.5672759917655982,
+ "grad_norm": 19884.58203125,
+ "learning_rate": 5.550559611891095e-05,
+ "loss": 0.4196,
+ "step": 109950
+ },
+ {
+ "epoch": 0.567533961748211,
+ "grad_norm": 22072.25390625,
+ "learning_rate": 5.546928663834745e-05,
+ "loss": 0.4196,
+ "step": 110000
+ },
+ {
+ "epoch": 0.567533961748211,
+ "eval_loss": 0.4103853106498718,
+ "eval_runtime": 3606.5234,
+ "eval_samples_per_second": 859.864,
+ "eval_steps_per_second": 1.679,
+ "step": 110000
+ },
+ {
+ "epoch": 0.5677919317308238,
+ "grad_norm": 21647.181640625,
+ "learning_rate": 5.543297423838464e-05,
+ "loss": 0.414,
+ "step": 110050
+ },
+ {
+ "epoch": 0.5680499017134366,
+ "grad_norm": 23264.748046875,
+ "learning_rate": 5.5396658938405396e-05,
+ "loss": 0.4192,
+ "step": 110100
+ },
+ {
+ "epoch": 0.5683078716960495,
+ "grad_norm": 21868.10546875,
+ "learning_rate": 5.536034075779409e-05,
+ "loss": 0.4222,
+ "step": 110150
+ },
+ {
+ "epoch": 0.5685658416786623,
+ "grad_norm": 22489.07421875,
+ "learning_rate": 5.53240197159367e-05,
+ "loss": 0.4237,
+ "step": 110200
+ },
+ {
+ "epoch": 0.568823811661275,
+ "grad_norm": 21589.173828125,
+ "learning_rate": 5.5287695832220674e-05,
+ "loss": 0.4218,
+ "step": 110250
+ },
+ {
+ "epoch": 0.5690817816438879,
+ "grad_norm": 23184.103515625,
+ "learning_rate": 5.525136912603501e-05,
+ "loss": 0.4203,
+ "step": 110300
+ },
+ {
+ "epoch": 0.5693397516265007,
+ "grad_norm": 23085.970703125,
+ "learning_rate": 5.521503961677019e-05,
+ "loss": 0.4216,
+ "step": 110350
+ },
+ {
+ "epoch": 0.5695977216091136,
+ "grad_norm": 22217.3671875,
+ "learning_rate": 5.51787073238182e-05,
+ "loss": 0.424,
+ "step": 110400
+ },
+ {
+ "epoch": 0.5698556915917263,
+ "grad_norm": 23515.9375,
+ "learning_rate": 5.514237226657253e-05,
+ "loss": 0.4217,
+ "step": 110450
+ },
+ {
+ "epoch": 0.5701136615743392,
+ "grad_norm": 21375.2734375,
+ "learning_rate": 5.510603446442812e-05,
+ "loss": 0.4175,
+ "step": 110500
+ },
+ {
+ "epoch": 0.570371631556952,
+ "grad_norm": 21658.15625,
+ "learning_rate": 5.506969393678139e-05,
+ "loss": 0.4191,
+ "step": 110550
+ },
+ {
+ "epoch": 0.5706296015395649,
+ "grad_norm": 24653.294921875,
+ "learning_rate": 5.503335070303018e-05,
+ "loss": 0.419,
+ "step": 110600
+ },
+ {
+ "epoch": 0.5708875715221777,
+ "grad_norm": 21722.984375,
+ "learning_rate": 5.4997004782573855e-05,
+ "loss": 0.4237,
+ "step": 110650
+ },
+ {
+ "epoch": 0.5711455415047905,
+ "grad_norm": 20897.595703125,
+ "learning_rate": 5.496065619481312e-05,
+ "loss": 0.4211,
+ "step": 110700
+ },
+ {
+ "epoch": 0.5714035114874033,
+ "grad_norm": 20729.123046875,
+ "learning_rate": 5.4924304959150175e-05,
+ "loss": 0.4228,
+ "step": 110750
+ },
+ {
+ "epoch": 0.5716614814700162,
+ "grad_norm": 22107.888671875,
+ "learning_rate": 5.488795109498861e-05,
+ "loss": 0.4222,
+ "step": 110800
+ },
+ {
+ "epoch": 0.571919451452629,
+ "grad_norm": 24264.587890625,
+ "learning_rate": 5.485159462173337e-05,
+ "loss": 0.4232,
+ "step": 110850
+ },
+ {
+ "epoch": 0.5721774214352418,
+ "grad_norm": 23000.34375,
+ "learning_rate": 5.481523555879089e-05,
+ "loss": 0.4236,
+ "step": 110900
+ },
+ {
+ "epoch": 0.5724353914178546,
+ "grad_norm": 20345.26953125,
+ "learning_rate": 5.4778873925568905e-05,
+ "loss": 0.4254,
+ "step": 110950
+ },
+ {
+ "epoch": 0.5726933614004674,
+ "grad_norm": 25514.09765625,
+ "learning_rate": 5.4742509741476566e-05,
+ "loss": 0.4247,
+ "step": 111000
+ },
+ {
+ "epoch": 0.5729513313830803,
+ "grad_norm": 22510.115234375,
+ "learning_rate": 5.470614302592434e-05,
+ "loss": 0.4271,
+ "step": 111050
+ },
+ {
+ "epoch": 0.573209301365693,
+ "grad_norm": 24683.4921875,
+ "learning_rate": 5.466977379832411e-05,
+ "loss": 0.4207,
+ "step": 111100
+ },
+ {
+ "epoch": 0.5734672713483059,
+ "grad_norm": 22154.2890625,
+ "learning_rate": 5.4633402078089035e-05,
+ "loss": 0.422,
+ "step": 111150
+ },
+ {
+ "epoch": 0.5737252413309187,
+ "grad_norm": 23333.966796875,
+ "learning_rate": 5.459702788463367e-05,
+ "loss": 0.4218,
+ "step": 111200
+ },
+ {
+ "epoch": 0.5739832113135316,
+ "grad_norm": 26566.900390625,
+ "learning_rate": 5.4560651237373814e-05,
+ "loss": 0.4269,
+ "step": 111250
+ },
+ {
+ "epoch": 0.5742411812961444,
+ "grad_norm": 21463.828125,
+ "learning_rate": 5.452427215572666e-05,
+ "loss": 0.4196,
+ "step": 111300
+ },
+ {
+ "epoch": 0.5744991512787572,
+ "grad_norm": 24921.373046875,
+ "learning_rate": 5.448789065911064e-05,
+ "loss": 0.4248,
+ "step": 111350
+ },
+ {
+ "epoch": 0.57475712126137,
+ "grad_norm": 23610.16015625,
+ "learning_rate": 5.445150676694548e-05,
+ "loss": 0.4245,
+ "step": 111400
+ },
+ {
+ "epoch": 0.5750150912439829,
+ "grad_norm": 24598.2109375,
+ "learning_rate": 5.441512049865221e-05,
+ "loss": 0.4199,
+ "step": 111450
+ },
+ {
+ "epoch": 0.5752730612265957,
+ "grad_norm": 24330.02734375,
+ "learning_rate": 5.43787318736531e-05,
+ "loss": 0.423,
+ "step": 111500
+ },
+ {
+ "epoch": 0.5755310312092085,
+ "grad_norm": 23434.587890625,
+ "learning_rate": 5.434234091137171e-05,
+ "loss": 0.4214,
+ "step": 111550
+ },
+ {
+ "epoch": 0.5757890011918213,
+ "grad_norm": 25007.08203125,
+ "learning_rate": 5.430594763123283e-05,
+ "loss": 0.4258,
+ "step": 111600
+ },
+ {
+ "epoch": 0.5760469711744342,
+ "grad_norm": 24568.759765625,
+ "learning_rate": 5.4269552052662486e-05,
+ "loss": 0.4248,
+ "step": 111650
+ },
+ {
+ "epoch": 0.576304941157047,
+ "grad_norm": 22131.74609375,
+ "learning_rate": 5.423315419508792e-05,
+ "loss": 0.418,
+ "step": 111700
+ },
+ {
+ "epoch": 0.5765629111396597,
+ "grad_norm": 22058.443359375,
+ "learning_rate": 5.4196754077937626e-05,
+ "loss": 0.4289,
+ "step": 111750
+ },
+ {
+ "epoch": 0.5768208811222726,
+ "grad_norm": 23790.3203125,
+ "learning_rate": 5.4160351720641276e-05,
+ "loss": 0.4217,
+ "step": 111800
+ },
+ {
+ "epoch": 0.5770788511048854,
+ "grad_norm": 20349.287109375,
+ "learning_rate": 5.412394714262974e-05,
+ "loss": 0.4154,
+ "step": 111850
+ },
+ {
+ "epoch": 0.5773368210874983,
+ "grad_norm": 20262.9296875,
+ "learning_rate": 5.408754036333506e-05,
+ "loss": 0.4214,
+ "step": 111900
+ },
+ {
+ "epoch": 0.5775947910701111,
+ "grad_norm": 21678.17578125,
+ "learning_rate": 5.4051131402190494e-05,
+ "loss": 0.4168,
+ "step": 111950
+ },
+ {
+ "epoch": 0.5778527610527239,
+ "grad_norm": 22649.1875,
+ "learning_rate": 5.4014720278630415e-05,
+ "loss": 0.4206,
+ "step": 112000
+ },
+ {
+ "epoch": 0.5781107310353367,
+ "grad_norm": 22304.1328125,
+ "learning_rate": 5.39783070120904e-05,
+ "loss": 0.4199,
+ "step": 112050
+ },
+ {
+ "epoch": 0.5783687010179496,
+ "grad_norm": 22573.169921875,
+ "learning_rate": 5.394189162200715e-05,
+ "loss": 0.4325,
+ "step": 112100
+ },
+ {
+ "epoch": 0.5786266710005624,
+ "grad_norm": 22942.09765625,
+ "learning_rate": 5.390547412781847e-05,
+ "loss": 0.416,
+ "step": 112150
+ },
+ {
+ "epoch": 0.5788846409831752,
+ "grad_norm": 20210.18359375,
+ "learning_rate": 5.386905454896333e-05,
+ "loss": 0.4274,
+ "step": 112200
+ },
+ {
+ "epoch": 0.579142610965788,
+ "grad_norm": 22916.09375,
+ "learning_rate": 5.38326329048818e-05,
+ "loss": 0.4208,
+ "step": 112250
+ },
+ {
+ "epoch": 0.5794005809484009,
+ "grad_norm": 21563.78125,
+ "learning_rate": 5.379620921501503e-05,
+ "loss": 0.4264,
+ "step": 112300
+ },
+ {
+ "epoch": 0.5796585509310137,
+ "grad_norm": 20984.3671875,
+ "learning_rate": 5.375978349880528e-05,
+ "loss": 0.4229,
+ "step": 112350
+ },
+ {
+ "epoch": 0.5799165209136264,
+ "grad_norm": 22014.6640625,
+ "learning_rate": 5.372335577569592e-05,
+ "loss": 0.4205,
+ "step": 112400
+ },
+ {
+ "epoch": 0.5801744908962393,
+ "grad_norm": 22977.39453125,
+ "learning_rate": 5.3686926065131325e-05,
+ "loss": 0.4248,
+ "step": 112450
+ },
+ {
+ "epoch": 0.5804324608788521,
+ "grad_norm": 22589.521484375,
+ "learning_rate": 5.365049438655702e-05,
+ "loss": 0.4165,
+ "step": 112500
+ },
+ {
+ "epoch": 0.580690430861465,
+ "grad_norm": 24455.625,
+ "learning_rate": 5.3614060759419474e-05,
+ "loss": 0.4224,
+ "step": 112550
+ },
+ {
+ "epoch": 0.5809484008440777,
+ "grad_norm": 24485.833984375,
+ "learning_rate": 5.357762520316628e-05,
+ "loss": 0.4264,
+ "step": 112600
+ },
+ {
+ "epoch": 0.5812063708266906,
+ "grad_norm": 23294.244140625,
+ "learning_rate": 5.354118773724603e-05,
+ "loss": 0.4254,
+ "step": 112650
+ },
+ {
+ "epoch": 0.5814643408093034,
+ "grad_norm": 21813.884765625,
+ "learning_rate": 5.350474838110835e-05,
+ "loss": 0.4226,
+ "step": 112700
+ },
+ {
+ "epoch": 0.5817223107919163,
+ "grad_norm": 23532.0546875,
+ "learning_rate": 5.3468307154203836e-05,
+ "loss": 0.4236,
+ "step": 112750
+ },
+ {
+ "epoch": 0.5819802807745291,
+ "grad_norm": 24739.787109375,
+ "learning_rate": 5.343186407598413e-05,
+ "loss": 0.4276,
+ "step": 112800
+ },
+ {
+ "epoch": 0.5822382507571419,
+ "grad_norm": 23312.783203125,
+ "learning_rate": 5.339541916590184e-05,
+ "loss": 0.4281,
+ "step": 112850
+ },
+ {
+ "epoch": 0.5824962207397547,
+ "grad_norm": 24166.798828125,
+ "learning_rate": 5.335897244341054e-05,
+ "loss": 0.4238,
+ "step": 112900
+ },
+ {
+ "epoch": 0.5827541907223676,
+ "grad_norm": 23690.455078125,
+ "learning_rate": 5.332252392796478e-05,
+ "loss": 0.4181,
+ "step": 112950
+ },
+ {
+ "epoch": 0.5830121607049804,
+ "grad_norm": 23499.16015625,
+ "learning_rate": 5.32860736390201e-05,
+ "loss": 0.4143,
+ "step": 113000
+ },
+ {
+ "epoch": 0.5832701306875931,
+ "grad_norm": 23299.5625,
+ "learning_rate": 5.324962159603294e-05,
+ "loss": 0.4198,
+ "step": 113050
+ },
+ {
+ "epoch": 0.583528100670206,
+ "grad_norm": 22958.423828125,
+ "learning_rate": 5.321316781846071e-05,
+ "loss": 0.421,
+ "step": 113100
+ },
+ {
+ "epoch": 0.5837860706528188,
+ "grad_norm": 20775.119140625,
+ "learning_rate": 5.3176712325761704e-05,
+ "loss": 0.4148,
+ "step": 113150
+ },
+ {
+ "epoch": 0.5840440406354317,
+ "grad_norm": 23139.953125,
+ "learning_rate": 5.3140255137395155e-05,
+ "loss": 0.422,
+ "step": 113200
+ },
+ {
+ "epoch": 0.5843020106180444,
+ "grad_norm": 19829.94140625,
+ "learning_rate": 5.310379627282125e-05,
+ "loss": 0.4248,
+ "step": 113250
+ },
+ {
+ "epoch": 0.5845599806006573,
+ "grad_norm": 20085.572265625,
+ "learning_rate": 5.306733575150099e-05,
+ "loss": 0.4183,
+ "step": 113300
+ },
+ {
+ "epoch": 0.5848179505832701,
+ "grad_norm": 23206.27734375,
+ "learning_rate": 5.303087359289629e-05,
+ "loss": 0.423,
+ "step": 113350
+ },
+ {
+ "epoch": 0.585075920565883,
+ "grad_norm": 25039.34765625,
+ "learning_rate": 5.299440981646996e-05,
+ "loss": 0.4232,
+ "step": 113400
+ },
+ {
+ "epoch": 0.5853338905484958,
+ "grad_norm": 21276.865234375,
+ "learning_rate": 5.2957944441685646e-05,
+ "loss": 0.4205,
+ "step": 113450
+ },
+ {
+ "epoch": 0.5855918605311086,
+ "grad_norm": 22706.712890625,
+ "learning_rate": 5.292147748800788e-05,
+ "loss": 0.4225,
+ "step": 113500
+ },
+ {
+ "epoch": 0.5858498305137214,
+ "grad_norm": 18046.537109375,
+ "learning_rate": 5.2885008974902004e-05,
+ "loss": 0.4195,
+ "step": 113550
+ },
+ {
+ "epoch": 0.5861078004963343,
+ "grad_norm": 22363.5625,
+ "learning_rate": 5.28485389218342e-05,
+ "loss": 0.4149,
+ "step": 113600
+ },
+ {
+ "epoch": 0.5863657704789471,
+ "grad_norm": 24409.609375,
+ "learning_rate": 5.2812067348271466e-05,
+ "loss": 0.4224,
+ "step": 113650
+ },
+ {
+ "epoch": 0.5866237404615599,
+ "grad_norm": 23921.68359375,
+ "learning_rate": 5.277559427368164e-05,
+ "loss": 0.4274,
+ "step": 113700
+ },
+ {
+ "epoch": 0.5868817104441727,
+ "grad_norm": 23887.84765625,
+ "learning_rate": 5.273911971753335e-05,
+ "loss": 0.4185,
+ "step": 113750
+ },
+ {
+ "epoch": 0.5871396804267855,
+ "grad_norm": 23169.423828125,
+ "learning_rate": 5.270264369929597e-05,
+ "loss": 0.4218,
+ "step": 113800
+ },
+ {
+ "epoch": 0.5873976504093984,
+ "grad_norm": 23339.57421875,
+ "learning_rate": 5.266616623843972e-05,
+ "loss": 0.4211,
+ "step": 113850
+ },
+ {
+ "epoch": 0.5876556203920111,
+ "grad_norm": 22072.59765625,
+ "learning_rate": 5.2629687354435576e-05,
+ "loss": 0.4191,
+ "step": 113900
+ },
+ {
+ "epoch": 0.587913590374624,
+ "grad_norm": 24308.357421875,
+ "learning_rate": 5.259320706675523e-05,
+ "loss": 0.4168,
+ "step": 113950
+ },
+ {
+ "epoch": 0.5881715603572368,
+ "grad_norm": 20896.5703125,
+ "learning_rate": 5.255672539487119e-05,
+ "loss": 0.4201,
+ "step": 114000
+ },
+ {
+ "epoch": 0.5884295303398497,
+ "grad_norm": 20070.814453125,
+ "learning_rate": 5.252024235825661e-05,
+ "loss": 0.4216,
+ "step": 114050
+ },
+ {
+ "epoch": 0.5886875003224625,
+ "grad_norm": 24864.869140625,
+ "learning_rate": 5.2483757976385486e-05,
+ "loss": 0.4269,
+ "step": 114100
+ },
+ {
+ "epoch": 0.5889454703050753,
+ "grad_norm": 24734.0234375,
+ "learning_rate": 5.2447272268732436e-05,
+ "loss": 0.4196,
+ "step": 114150
+ },
+ {
+ "epoch": 0.5892034402876881,
+ "grad_norm": 22383.212890625,
+ "learning_rate": 5.2410785254772856e-05,
+ "loss": 0.4171,
+ "step": 114200
+ },
+ {
+ "epoch": 0.589461410270301,
+ "grad_norm": 22587.44921875,
+ "learning_rate": 5.237429695398276e-05,
+ "loss": 0.4232,
+ "step": 114250
+ },
+ {
+ "epoch": 0.5897193802529138,
+ "grad_norm": 23357.263671875,
+ "learning_rate": 5.2337807385838935e-05,
+ "loss": 0.4241,
+ "step": 114300
+ },
+ {
+ "epoch": 0.5899773502355266,
+ "grad_norm": 24632.125,
+ "learning_rate": 5.23013165698188e-05,
+ "loss": 0.4154,
+ "step": 114350
+ },
+ {
+ "epoch": 0.5902353202181394,
+ "grad_norm": 23191.818359375,
+ "learning_rate": 5.226482452540045e-05,
+ "loss": 0.424,
+ "step": 114400
+ },
+ {
+ "epoch": 0.5904932902007523,
+ "grad_norm": 23649.560546875,
+ "learning_rate": 5.2228331272062626e-05,
+ "loss": 0.427,
+ "step": 114450
+ },
+ {
+ "epoch": 0.5907512601833651,
+ "grad_norm": 23992.169921875,
+ "learning_rate": 5.21918368292847e-05,
+ "loss": 0.4267,
+ "step": 114500
+ },
+ {
+ "epoch": 0.5910092301659778,
+ "grad_norm": 21792.041015625,
+ "learning_rate": 5.215534121654673e-05,
+ "loss": 0.4272,
+ "step": 114550
+ },
+ {
+ "epoch": 0.5912672001485907,
+ "grad_norm": 25516.345703125,
+ "learning_rate": 5.211884445332935e-05,
+ "loss": 0.4207,
+ "step": 114600
+ },
+ {
+ "epoch": 0.5915251701312035,
+ "grad_norm": 22557.25390625,
+ "learning_rate": 5.208234655911384e-05,
+ "loss": 0.4228,
+ "step": 114650
+ },
+ {
+ "epoch": 0.5917831401138164,
+ "grad_norm": 24185.09765625,
+ "learning_rate": 5.2045847553382045e-05,
+ "loss": 0.4226,
+ "step": 114700
+ },
+ {
+ "epoch": 0.5920411100964291,
+ "grad_norm": 20565.134765625,
+ "learning_rate": 5.200934745561643e-05,
+ "loss": 0.4274,
+ "step": 114750
+ },
+ {
+ "epoch": 0.592299080079042,
+ "grad_norm": 24019.0,
+ "learning_rate": 5.197284628530007e-05,
+ "loss": 0.4234,
+ "step": 114800
+ },
+ {
+ "epoch": 0.5925570500616548,
+ "grad_norm": 26129.01171875,
+ "learning_rate": 5.193634406191658e-05,
+ "loss": 0.418,
+ "step": 114850
+ },
+ {
+ "epoch": 0.5928150200442677,
+ "grad_norm": 25187.611328125,
+ "learning_rate": 5.18998408049501e-05,
+ "loss": 0.4213,
+ "step": 114900
+ },
+ {
+ "epoch": 0.5930729900268805,
+ "grad_norm": 20145.14453125,
+ "learning_rate": 5.186333653388539e-05,
+ "loss": 0.418,
+ "step": 114950
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "grad_norm": 22472.322265625,
+ "learning_rate": 5.182683126820773e-05,
+ "loss": 0.4209,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "eval_loss": 0.4084908068180084,
+ "eval_runtime": 3582.6916,
+ "eval_samples_per_second": 865.584,
+ "eval_steps_per_second": 1.691,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5935889299921061,
+ "grad_norm": 22404.052734375,
+ "learning_rate": 5.179032502740291e-05,
+ "loss": 0.4147,
+ "step": 115050
+ },
+ {
+ "epoch": 0.593846899974719,
+ "grad_norm": 21242.705078125,
+ "learning_rate": 5.175381783095725e-05,
+ "loss": 0.4237,
+ "step": 115100
+ },
+ {
+ "epoch": 0.5941048699573318,
+ "grad_norm": 22416.06640625,
+ "learning_rate": 5.171730969835758e-05,
+ "loss": 0.4185,
+ "step": 115150
+ },
+ {
+ "epoch": 0.5943628399399445,
+ "grad_norm": 22231.525390625,
+ "learning_rate": 5.1680800649091276e-05,
+ "loss": 0.4227,
+ "step": 115200
+ },
+ {
+ "epoch": 0.5946208099225574,
+ "grad_norm": 22431.462890625,
+ "learning_rate": 5.164429070264613e-05,
+ "loss": 0.4225,
+ "step": 115250
+ },
+ {
+ "epoch": 0.5948787799051702,
+ "grad_norm": 26008.57421875,
+ "learning_rate": 5.160777987851044e-05,
+ "loss": 0.4253,
+ "step": 115300
+ },
+ {
+ "epoch": 0.5951367498877831,
+ "grad_norm": 22555.501953125,
+ "learning_rate": 5.157126819617297e-05,
+ "loss": 0.4181,
+ "step": 115350
+ },
+ {
+ "epoch": 0.5953947198703958,
+ "grad_norm": 25113.587890625,
+ "learning_rate": 5.153475567512298e-05,
+ "loss": 0.4261,
+ "step": 115400
+ },
+ {
+ "epoch": 0.5956526898530087,
+ "grad_norm": 22877.908203125,
+ "learning_rate": 5.149824233485013e-05,
+ "loss": 0.4177,
+ "step": 115450
+ },
+ {
+ "epoch": 0.5959106598356215,
+ "grad_norm": 22468.34375,
+ "learning_rate": 5.1461728194844526e-05,
+ "loss": 0.4223,
+ "step": 115500
+ },
+ {
+ "epoch": 0.5961686298182344,
+ "grad_norm": 24525.326171875,
+ "learning_rate": 5.142521327459672e-05,
+ "loss": 0.4159,
+ "step": 115550
+ },
+ {
+ "epoch": 0.5964265998008472,
+ "grad_norm": 23334.296875,
+ "learning_rate": 5.1388697593597643e-05,
+ "loss": 0.4206,
+ "step": 115600
+ },
+ {
+ "epoch": 0.59668456978346,
+ "grad_norm": 21743.333984375,
+ "learning_rate": 5.1352181171338706e-05,
+ "loss": 0.4191,
+ "step": 115650
+ },
+ {
+ "epoch": 0.5969425397660728,
+ "grad_norm": 26287.66796875,
+ "learning_rate": 5.131566402731165e-05,
+ "loss": 0.4147,
+ "step": 115700
+ },
+ {
+ "epoch": 0.5972005097486857,
+ "grad_norm": 20856.890625,
+ "learning_rate": 5.12791461810086e-05,
+ "loss": 0.4248,
+ "step": 115750
+ },
+ {
+ "epoch": 0.5974584797312985,
+ "grad_norm": 22821.73046875,
+ "learning_rate": 5.124262765192208e-05,
+ "loss": 0.4239,
+ "step": 115800
+ },
+ {
+ "epoch": 0.5977164497139112,
+ "grad_norm": 24805.427734375,
+ "learning_rate": 5.1206108459545e-05,
+ "loss": 0.4172,
+ "step": 115850
+ },
+ {
+ "epoch": 0.5979744196965241,
+ "grad_norm": 25195.064453125,
+ "learning_rate": 5.116958862337057e-05,
+ "loss": 0.4242,
+ "step": 115900
+ },
+ {
+ "epoch": 0.5982323896791369,
+ "grad_norm": 22029.236328125,
+ "learning_rate": 5.1133068162892383e-05,
+ "loss": 0.4217,
+ "step": 115950
+ },
+ {
+ "epoch": 0.5984903596617498,
+ "grad_norm": 23296.77734375,
+ "learning_rate": 5.109654709760434e-05,
+ "loss": 0.4223,
+ "step": 116000
+ },
+ {
+ "epoch": 0.5987483296443625,
+ "grad_norm": 23822.447265625,
+ "learning_rate": 5.106002544700069e-05,
+ "loss": 0.4235,
+ "step": 116050
+ },
+ {
+ "epoch": 0.5990062996269754,
+ "grad_norm": 21188.46875,
+ "learning_rate": 5.1023503230576e-05,
+ "loss": 0.4275,
+ "step": 116100
+ },
+ {
+ "epoch": 0.5992642696095882,
+ "grad_norm": 24459.021484375,
+ "learning_rate": 5.0986980467825096e-05,
+ "loss": 0.4217,
+ "step": 116150
+ },
+ {
+ "epoch": 0.5995222395922011,
+ "grad_norm": 22304.396484375,
+ "learning_rate": 5.095045717824313e-05,
+ "loss": 0.42,
+ "step": 116200
+ },
+ {
+ "epoch": 0.5997802095748139,
+ "grad_norm": 20124.943359375,
+ "learning_rate": 5.0913933381325516e-05,
+ "loss": 0.4149,
+ "step": 116250
+ },
+ {
+ "epoch": 0.6000381795574267,
+ "grad_norm": 22610.3046875,
+ "learning_rate": 5.087740909656798e-05,
+ "loss": 0.4164,
+ "step": 116300
+ },
+ {
+ "epoch": 0.6002961495400395,
+ "grad_norm": 22058.974609375,
+ "learning_rate": 5.084088434346645e-05,
+ "loss": 0.4211,
+ "step": 116350
+ },
+ {
+ "epoch": 0.6005541195226524,
+ "grad_norm": 23463.626953125,
+ "learning_rate": 5.0804359141517134e-05,
+ "loss": 0.4182,
+ "step": 116400
+ },
+ {
+ "epoch": 0.6008120895052652,
+ "grad_norm": 25045.67578125,
+ "learning_rate": 5.076783351021648e-05,
+ "loss": 0.4202,
+ "step": 116450
+ },
+ {
+ "epoch": 0.601070059487878,
+ "grad_norm": 22583.076171875,
+ "learning_rate": 5.0731307469061184e-05,
+ "loss": 0.4177,
+ "step": 116500
+ },
+ {
+ "epoch": 0.6013280294704908,
+ "grad_norm": 26350.400390625,
+ "learning_rate": 5.069478103754811e-05,
+ "loss": 0.4193,
+ "step": 116550
+ },
+ {
+ "epoch": 0.6015859994531036,
+ "grad_norm": 21430.255859375,
+ "learning_rate": 5.0658254235174385e-05,
+ "loss": 0.422,
+ "step": 116600
+ },
+ {
+ "epoch": 0.6018439694357165,
+ "grad_norm": 21467.482421875,
+ "learning_rate": 5.0621727081437275e-05,
+ "loss": 0.4157,
+ "step": 116650
+ },
+ {
+ "epoch": 0.6021019394183292,
+ "grad_norm": 25780.095703125,
+ "learning_rate": 5.05851995958343e-05,
+ "loss": 0.4243,
+ "step": 116700
+ },
+ {
+ "epoch": 0.6023599094009421,
+ "grad_norm": 21074.52734375,
+ "learning_rate": 5.0548671797863125e-05,
+ "loss": 0.4271,
+ "step": 116750
+ },
+ {
+ "epoch": 0.6026178793835549,
+ "grad_norm": 25752.841796875,
+ "learning_rate": 5.051214370702155e-05,
+ "loss": 0.4209,
+ "step": 116800
+ },
+ {
+ "epoch": 0.6028758493661678,
+ "grad_norm": 23178.93359375,
+ "learning_rate": 5.047561534280758e-05,
+ "loss": 0.4232,
+ "step": 116850
+ },
+ {
+ "epoch": 0.6031338193487806,
+ "grad_norm": 23263.65625,
+ "learning_rate": 5.0439086724719355e-05,
+ "loss": 0.4196,
+ "step": 116900
+ },
+ {
+ "epoch": 0.6033917893313934,
+ "grad_norm": 20372.861328125,
+ "learning_rate": 5.040255787225514e-05,
+ "loss": 0.4194,
+ "step": 116950
+ },
+ {
+ "epoch": 0.6036497593140062,
+ "grad_norm": 23453.59375,
+ "learning_rate": 5.036602880491332e-05,
+ "loss": 0.4156,
+ "step": 117000
+ },
+ {
+ "epoch": 0.6039077292966191,
+ "grad_norm": 24039.392578125,
+ "learning_rate": 5.032949954219243e-05,
+ "loss": 0.4192,
+ "step": 117050
+ },
+ {
+ "epoch": 0.6041656992792319,
+ "grad_norm": 24246.55859375,
+ "learning_rate": 5.0292970103591044e-05,
+ "loss": 0.426,
+ "step": 117100
+ },
+ {
+ "epoch": 0.6044236692618447,
+ "grad_norm": 23403.130859375,
+ "learning_rate": 5.0256440508607904e-05,
+ "loss": 0.4195,
+ "step": 117150
+ },
+ {
+ "epoch": 0.6046816392444575,
+ "grad_norm": 21872.07421875,
+ "learning_rate": 5.021991077674179e-05,
+ "loss": 0.4214,
+ "step": 117200
+ },
+ {
+ "epoch": 0.6049396092270704,
+ "grad_norm": 22344.455078125,
+ "learning_rate": 5.018338092749155e-05,
+ "loss": 0.4205,
+ "step": 117250
+ },
+ {
+ "epoch": 0.6051975792096832,
+ "grad_norm": 22999.099609375,
+ "learning_rate": 5.014685098035612e-05,
+ "loss": 0.4203,
+ "step": 117300
+ },
+ {
+ "epoch": 0.6054555491922959,
+ "grad_norm": 21572.994140625,
+ "learning_rate": 5.011032095483448e-05,
+ "loss": 0.4279,
+ "step": 117350
+ },
+ {
+ "epoch": 0.6057135191749088,
+ "grad_norm": 21263.11328125,
+ "learning_rate": 5.007379087042566e-05,
+ "loss": 0.418,
+ "step": 117400
+ },
+ {
+ "epoch": 0.6059714891575216,
+ "grad_norm": 22789.671875,
+ "learning_rate": 5.00372607466287e-05,
+ "loss": 0.4196,
+ "step": 117450
+ },
+ {
+ "epoch": 0.6062294591401345,
+ "grad_norm": 21276.09765625,
+ "learning_rate": 5.000073060294267e-05,
+ "loss": 0.4125,
+ "step": 117500
+ },
+ {
+ "epoch": 0.6064874291227472,
+ "grad_norm": 22501.169921875,
+ "learning_rate": 4.9964200458866654e-05,
+ "loss": 0.4152,
+ "step": 117550
+ },
+ {
+ "epoch": 0.6067453991053601,
+ "grad_norm": 21645.912109375,
+ "learning_rate": 4.992767033389976e-05,
+ "loss": 0.4253,
+ "step": 117600
+ },
+ {
+ "epoch": 0.6070033690879729,
+ "grad_norm": 21256.7109375,
+ "learning_rate": 4.9891140247541025e-05,
+ "loss": 0.4214,
+ "step": 117650
+ },
+ {
+ "epoch": 0.6072613390705858,
+ "grad_norm": 22883.98046875,
+ "learning_rate": 4.985461021928952e-05,
+ "loss": 0.4238,
+ "step": 117700
+ },
+ {
+ "epoch": 0.6075193090531986,
+ "grad_norm": 21366.412109375,
+ "learning_rate": 4.981808026864426e-05,
+ "loss": 0.4225,
+ "step": 117750
+ },
+ {
+ "epoch": 0.6077772790358114,
+ "grad_norm": 24185.3515625,
+ "learning_rate": 4.978155041510425e-05,
+ "loss": 0.4196,
+ "step": 117800
+ },
+ {
+ "epoch": 0.6080352490184242,
+ "grad_norm": 21638.009765625,
+ "learning_rate": 4.974502067816838e-05,
+ "loss": 0.4221,
+ "step": 117850
+ },
+ {
+ "epoch": 0.608293219001037,
+ "grad_norm": 20867.111328125,
+ "learning_rate": 4.970849107733554e-05,
+ "loss": 0.4225,
+ "step": 117900
+ },
+ {
+ "epoch": 0.6085511889836499,
+ "grad_norm": 21785.69140625,
+ "learning_rate": 4.967196163210451e-05,
+ "loss": 0.4166,
+ "step": 117950
+ },
+ {
+ "epoch": 0.6088091589662626,
+ "grad_norm": 24691.8515625,
+ "learning_rate": 4.963543236197401e-05,
+ "loss": 0.4226,
+ "step": 118000
+ },
+ {
+ "epoch": 0.6090671289488755,
+ "grad_norm": 21214.1484375,
+ "learning_rate": 4.9598903286442675e-05,
+ "loss": 0.418,
+ "step": 118050
+ },
+ {
+ "epoch": 0.6093250989314883,
+ "grad_norm": 22802.849609375,
+ "learning_rate": 4.956237442500898e-05,
+ "loss": 0.4227,
+ "step": 118100
+ },
+ {
+ "epoch": 0.6095830689141012,
+ "grad_norm": 25204.90625,
+ "learning_rate": 4.952584579717135e-05,
+ "loss": 0.4152,
+ "step": 118150
+ },
+ {
+ "epoch": 0.6098410388967139,
+ "grad_norm": 21970.19140625,
+ "learning_rate": 4.9489317422428044e-05,
+ "loss": 0.4197,
+ "step": 118200
+ },
+ {
+ "epoch": 0.6100990088793268,
+ "grad_norm": 22331.013671875,
+ "learning_rate": 4.945278932027723e-05,
+ "loss": 0.4161,
+ "step": 118250
+ },
+ {
+ "epoch": 0.6103569788619396,
+ "grad_norm": 27234.177734375,
+ "learning_rate": 4.941626151021686e-05,
+ "loss": 0.4204,
+ "step": 118300
+ },
+ {
+ "epoch": 0.6106149488445525,
+ "grad_norm": 22253.0078125,
+ "learning_rate": 4.937973401174481e-05,
+ "loss": 0.4202,
+ "step": 118350
+ },
+ {
+ "epoch": 0.6108729188271653,
+ "grad_norm": 20930.27734375,
+ "learning_rate": 4.934320684435871e-05,
+ "loss": 0.4169,
+ "step": 118400
+ },
+ {
+ "epoch": 0.6111308888097781,
+ "grad_norm": 22569.205078125,
+ "learning_rate": 4.930668002755609e-05,
+ "loss": 0.4177,
+ "step": 118450
+ },
+ {
+ "epoch": 0.6113888587923909,
+ "grad_norm": 23197.943359375,
+ "learning_rate": 4.9270153580834256e-05,
+ "loss": 0.414,
+ "step": 118500
+ },
+ {
+ "epoch": 0.6116468287750038,
+ "grad_norm": 21927.1875,
+ "learning_rate": 4.923362752369029e-05,
+ "loss": 0.4203,
+ "step": 118550
+ },
+ {
+ "epoch": 0.6119047987576166,
+ "grad_norm": 23422.181640625,
+ "learning_rate": 4.919710187562112e-05,
+ "loss": 0.4213,
+ "step": 118600
+ },
+ {
+ "epoch": 0.6121627687402293,
+ "grad_norm": 23351.67578125,
+ "learning_rate": 4.9160576656123416e-05,
+ "loss": 0.4213,
+ "step": 118650
+ },
+ {
+ "epoch": 0.6124207387228422,
+ "grad_norm": 21228.416015625,
+ "learning_rate": 4.9124051884693664e-05,
+ "loss": 0.4192,
+ "step": 118700
+ },
+ {
+ "epoch": 0.612678708705455,
+ "grad_norm": 22555.9609375,
+ "learning_rate": 4.908752758082802e-05,
+ "loss": 0.4189,
+ "step": 118750
+ },
+ {
+ "epoch": 0.6129366786880679,
+ "grad_norm": 21010.859375,
+ "learning_rate": 4.905100376402251e-05,
+ "loss": 0.4194,
+ "step": 118800
+ },
+ {
+ "epoch": 0.6131946486706806,
+ "grad_norm": 23468.78515625,
+ "learning_rate": 4.901448045377279e-05,
+ "loss": 0.4151,
+ "step": 118850
+ },
+ {
+ "epoch": 0.6134526186532935,
+ "grad_norm": 23818.189453125,
+ "learning_rate": 4.8977957669574334e-05,
+ "loss": 0.4184,
+ "step": 118900
+ },
+ {
+ "epoch": 0.6137105886359063,
+ "grad_norm": 22162.76171875,
+ "learning_rate": 4.8941435430922294e-05,
+ "loss": 0.4181,
+ "step": 118950
+ },
+ {
+ "epoch": 0.6139685586185192,
+ "grad_norm": 22983.45703125,
+ "learning_rate": 4.8904913757311506e-05,
+ "loss": 0.4196,
+ "step": 119000
+ },
+ {
+ "epoch": 0.614226528601132,
+ "grad_norm": 22748.150390625,
+ "learning_rate": 4.886839266823656e-05,
+ "loss": 0.4195,
+ "step": 119050
+ },
+ {
+ "epoch": 0.6144844985837448,
+ "grad_norm": 23146.306640625,
+ "learning_rate": 4.8831872183191684e-05,
+ "loss": 0.4219,
+ "step": 119100
+ },
+ {
+ "epoch": 0.6147424685663576,
+ "grad_norm": 24951.591796875,
+ "learning_rate": 4.879535232167084e-05,
+ "loss": 0.4165,
+ "step": 119150
+ },
+ {
+ "epoch": 0.6150004385489705,
+ "grad_norm": 24381.689453125,
+ "learning_rate": 4.875883310316758e-05,
+ "loss": 0.4179,
+ "step": 119200
+ },
+ {
+ "epoch": 0.6152584085315833,
+ "grad_norm": 21191.4609375,
+ "learning_rate": 4.872231454717518e-05,
+ "loss": 0.4155,
+ "step": 119250
+ },
+ {
+ "epoch": 0.615516378514196,
+ "grad_norm": 21586.84375,
+ "learning_rate": 4.8685796673186526e-05,
+ "loss": 0.412,
+ "step": 119300
+ },
+ {
+ "epoch": 0.6157743484968089,
+ "grad_norm": 20381.505859375,
+ "learning_rate": 4.864927950069416e-05,
+ "loss": 0.4171,
+ "step": 119350
+ },
+ {
+ "epoch": 0.6160323184794217,
+ "grad_norm": 23258.296875,
+ "learning_rate": 4.861276304919026e-05,
+ "loss": 0.418,
+ "step": 119400
+ },
+ {
+ "epoch": 0.6162902884620346,
+ "grad_norm": 23629.14453125,
+ "learning_rate": 4.857624733816657e-05,
+ "loss": 0.4221,
+ "step": 119450
+ },
+ {
+ "epoch": 0.6165482584446473,
+ "grad_norm": 22892.7734375,
+ "learning_rate": 4.853973238711449e-05,
+ "loss": 0.4278,
+ "step": 119500
+ },
+ {
+ "epoch": 0.6168062284272602,
+ "grad_norm": 21639.669921875,
+ "learning_rate": 4.850321821552497e-05,
+ "loss": 0.4224,
+ "step": 119550
+ },
+ {
+ "epoch": 0.617064198409873,
+ "grad_norm": 21392.951171875,
+ "learning_rate": 4.84667048428886e-05,
+ "loss": 0.4192,
+ "step": 119600
+ },
+ {
+ "epoch": 0.6173221683924859,
+ "grad_norm": 22603.51953125,
+ "learning_rate": 4.843019228869548e-05,
+ "loss": 0.4169,
+ "step": 119650
+ },
+ {
+ "epoch": 0.6175801383750986,
+ "grad_norm": 22470.62109375,
+ "learning_rate": 4.8393680572435324e-05,
+ "loss": 0.4175,
+ "step": 119700
+ },
+ {
+ "epoch": 0.6178381083577115,
+ "grad_norm": 26185.634765625,
+ "learning_rate": 4.835716971359737e-05,
+ "loss": 0.4208,
+ "step": 119750
+ },
+ {
+ "epoch": 0.6180960783403243,
+ "grad_norm": 21508.12109375,
+ "learning_rate": 4.832065973167041e-05,
+ "loss": 0.4194,
+ "step": 119800
+ },
+ {
+ "epoch": 0.6183540483229372,
+ "grad_norm": 20717.205078125,
+ "learning_rate": 4.8284150646142784e-05,
+ "loss": 0.424,
+ "step": 119850
+ },
+ {
+ "epoch": 0.61861201830555,
+ "grad_norm": 20015.30078125,
+ "learning_rate": 4.8247642476502284e-05,
+ "loss": 0.4189,
+ "step": 119900
+ },
+ {
+ "epoch": 0.6188699882881628,
+ "grad_norm": 21596.349609375,
+ "learning_rate": 4.821113524223634e-05,
+ "loss": 0.4218,
+ "step": 119950
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "grad_norm": 22051.921875,
+ "learning_rate": 4.817462896283173e-05,
+ "loss": 0.4184,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "eval_loss": 0.40621376037597656,
+ "eval_runtime": 3588.5932,
+ "eval_samples_per_second": 864.16,
+ "eval_steps_per_second": 1.688,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6193859282533885,
+ "grad_norm": 22562.478515625,
+ "learning_rate": 4.813812365777486e-05,
+ "loss": 0.4171,
+ "step": 120050
+ },
+ {
+ "epoch": 0.6196438982360013,
+ "grad_norm": 22531.505859375,
+ "learning_rate": 4.81016193465515e-05,
+ "loss": 0.4171,
+ "step": 120100
+ },
+ {
+ "epoch": 0.619901868218614,
+ "grad_norm": 21869.177734375,
+ "learning_rate": 4.8065116048647005e-05,
+ "loss": 0.4184,
+ "step": 120150
+ },
+ {
+ "epoch": 0.6201598382012269,
+ "grad_norm": 23087.56640625,
+ "learning_rate": 4.802861378354607e-05,
+ "loss": 0.4177,
+ "step": 120200
+ },
+ {
+ "epoch": 0.6204178081838397,
+ "grad_norm": 22546.060546875,
+ "learning_rate": 4.7992112570732925e-05,
+ "loss": 0.4213,
+ "step": 120250
+ },
+ {
+ "epoch": 0.6206757781664526,
+ "grad_norm": 22802.8984375,
+ "learning_rate": 4.795561242969122e-05,
+ "loss": 0.4218,
+ "step": 120300
+ },
+ {
+ "epoch": 0.6209337481490653,
+ "grad_norm": 19467.32421875,
+ "learning_rate": 4.791911337990401e-05,
+ "loss": 0.4141,
+ "step": 120350
+ },
+ {
+ "epoch": 0.6211917181316782,
+ "grad_norm": 25076.169921875,
+ "learning_rate": 4.78826154408538e-05,
+ "loss": 0.4178,
+ "step": 120400
+ },
+ {
+ "epoch": 0.621449688114291,
+ "grad_norm": 20815.935546875,
+ "learning_rate": 4.784611863202244e-05,
+ "loss": 0.4217,
+ "step": 120450
+ },
+ {
+ "epoch": 0.6217076580969039,
+ "grad_norm": 21686.271484375,
+ "learning_rate": 4.780962297289126e-05,
+ "loss": 0.4124,
+ "step": 120500
+ },
+ {
+ "epoch": 0.6219656280795167,
+ "grad_norm": 22759.310546875,
+ "learning_rate": 4.777312848294092e-05,
+ "loss": 0.4159,
+ "step": 120550
+ },
+ {
+ "epoch": 0.6222235980621295,
+ "grad_norm": 25325.75390625,
+ "learning_rate": 4.773663518165148e-05,
+ "loss": 0.4176,
+ "step": 120600
+ },
+ {
+ "epoch": 0.6224815680447423,
+ "grad_norm": 23474.958984375,
+ "learning_rate": 4.7700143088502344e-05,
+ "loss": 0.4143,
+ "step": 120650
+ },
+ {
+ "epoch": 0.6227395380273552,
+ "grad_norm": 25355.40625,
+ "learning_rate": 4.766365222297229e-05,
+ "loss": 0.4262,
+ "step": 120700
+ },
+ {
+ "epoch": 0.622997508009968,
+ "grad_norm": 22215.14453125,
+ "learning_rate": 4.762716260453945e-05,
+ "loss": 0.4149,
+ "step": 120750
+ },
+ {
+ "epoch": 0.6232554779925807,
+ "grad_norm": 23521.607421875,
+ "learning_rate": 4.759067425268126e-05,
+ "loss": 0.4223,
+ "step": 120800
+ },
+ {
+ "epoch": 0.6235134479751936,
+ "grad_norm": 24524.02734375,
+ "learning_rate": 4.7554187186874513e-05,
+ "loss": 0.4256,
+ "step": 120850
+ },
+ {
+ "epoch": 0.6237714179578064,
+ "grad_norm": 19954.322265625,
+ "learning_rate": 4.7517701426595266e-05,
+ "loss": 0.4119,
+ "step": 120900
+ },
+ {
+ "epoch": 0.6240293879404193,
+ "grad_norm": 21612.1953125,
+ "learning_rate": 4.748121699131893e-05,
+ "loss": 0.4196,
+ "step": 120950
+ },
+ {
+ "epoch": 0.624287357923032,
+ "grad_norm": 20466.0,
+ "learning_rate": 4.744473390052019e-05,
+ "loss": 0.4181,
+ "step": 121000
+ },
+ {
+ "epoch": 0.6245453279056449,
+ "grad_norm": 19992.173828125,
+ "learning_rate": 4.740825217367304e-05,
+ "loss": 0.4159,
+ "step": 121050
+ },
+ {
+ "epoch": 0.6248032978882577,
+ "grad_norm": 21553.1328125,
+ "learning_rate": 4.737177183025067e-05,
+ "loss": 0.4157,
+ "step": 121100
+ },
+ {
+ "epoch": 0.6250612678708706,
+ "grad_norm": 22242.568359375,
+ "learning_rate": 4.73352928897256e-05,
+ "loss": 0.4153,
+ "step": 121150
+ },
+ {
+ "epoch": 0.6253192378534834,
+ "grad_norm": 23883.212890625,
+ "learning_rate": 4.7298815371569606e-05,
+ "loss": 0.4173,
+ "step": 121200
+ },
+ {
+ "epoch": 0.6255772078360962,
+ "grad_norm": 22386.505859375,
+ "learning_rate": 4.7262339295253645e-05,
+ "loss": 0.4178,
+ "step": 121250
+ },
+ {
+ "epoch": 0.625835177818709,
+ "grad_norm": 22051.859375,
+ "learning_rate": 4.722586468024797e-05,
+ "loss": 0.4111,
+ "step": 121300
+ },
+ {
+ "epoch": 0.6260931478013219,
+ "grad_norm": 21374.4765625,
+ "learning_rate": 4.7189391546021996e-05,
+ "loss": 0.418,
+ "step": 121350
+ },
+ {
+ "epoch": 0.6263511177839347,
+ "grad_norm": 22240.453125,
+ "learning_rate": 4.7152919912044406e-05,
+ "loss": 0.4196,
+ "step": 121400
+ },
+ {
+ "epoch": 0.6266090877665474,
+ "grad_norm": 26278.798828125,
+ "learning_rate": 4.711644979778302e-05,
+ "loss": 0.4165,
+ "step": 121450
+ },
+ {
+ "epoch": 0.6268670577491603,
+ "grad_norm": 22151.77734375,
+ "learning_rate": 4.707998122270492e-05,
+ "loss": 0.422,
+ "step": 121500
+ },
+ {
+ "epoch": 0.6271250277317731,
+ "grad_norm": 21278.99609375,
+ "learning_rate": 4.7043514206276276e-05,
+ "loss": 0.4202,
+ "step": 121550
+ },
+ {
+ "epoch": 0.627382997714386,
+ "grad_norm": 24062.6015625,
+ "learning_rate": 4.70070487679625e-05,
+ "loss": 0.4174,
+ "step": 121600
+ },
+ {
+ "epoch": 0.6276409676969987,
+ "grad_norm": 21124.400390625,
+ "learning_rate": 4.697058492722815e-05,
+ "loss": 0.4156,
+ "step": 121650
+ },
+ {
+ "epoch": 0.6278989376796116,
+ "grad_norm": 22513.48046875,
+ "learning_rate": 4.6934122703536894e-05,
+ "loss": 0.4198,
+ "step": 121700
+ },
+ {
+ "epoch": 0.6281569076622244,
+ "grad_norm": 24250.720703125,
+ "learning_rate": 4.689766211635159e-05,
+ "loss": 0.4197,
+ "step": 121750
+ },
+ {
+ "epoch": 0.6284148776448373,
+ "grad_norm": 23831.220703125,
+ "learning_rate": 4.686120318513415e-05,
+ "loss": 0.415,
+ "step": 121800
+ },
+ {
+ "epoch": 0.62867284762745,
+ "grad_norm": 24005.458984375,
+ "learning_rate": 4.682474592934569e-05,
+ "loss": 0.4154,
+ "step": 121850
+ },
+ {
+ "epoch": 0.6289308176100629,
+ "grad_norm": 21365.09375,
+ "learning_rate": 4.6788290368446355e-05,
+ "loss": 0.4164,
+ "step": 121900
+ },
+ {
+ "epoch": 0.6291887875926757,
+ "grad_norm": 23601.689453125,
+ "learning_rate": 4.675183652189545e-05,
+ "loss": 0.418,
+ "step": 121950
+ },
+ {
+ "epoch": 0.6294467575752886,
+ "grad_norm": 21023.33203125,
+ "learning_rate": 4.671538440915129e-05,
+ "loss": 0.4181,
+ "step": 122000
+ },
+ {
+ "epoch": 0.6297047275579014,
+ "grad_norm": 22292.671875,
+ "learning_rate": 4.667893404967133e-05,
+ "loss": 0.4203,
+ "step": 122050
+ },
+ {
+ "epoch": 0.6299626975405141,
+ "grad_norm": 21975.3671875,
+ "learning_rate": 4.664248546291207e-05,
+ "loss": 0.4162,
+ "step": 122100
+ },
+ {
+ "epoch": 0.630220667523127,
+ "grad_norm": 22591.34765625,
+ "learning_rate": 4.660603866832906e-05,
+ "loss": 0.4146,
+ "step": 122150
+ },
+ {
+ "epoch": 0.6304786375057398,
+ "grad_norm": 23449.529296875,
+ "learning_rate": 4.6569593685376895e-05,
+ "loss": 0.4205,
+ "step": 122200
+ },
+ {
+ "epoch": 0.6307366074883527,
+ "grad_norm": 21614.046875,
+ "learning_rate": 4.653315053350918e-05,
+ "loss": 0.4173,
+ "step": 122250
+ },
+ {
+ "epoch": 0.6309945774709654,
+ "grad_norm": 26004.5859375,
+ "learning_rate": 4.649670923217859e-05,
+ "loss": 0.4137,
+ "step": 122300
+ },
+ {
+ "epoch": 0.6312525474535783,
+ "grad_norm": 23640.357421875,
+ "learning_rate": 4.646026980083676e-05,
+ "loss": 0.4165,
+ "step": 122350
+ },
+ {
+ "epoch": 0.6315105174361911,
+ "grad_norm": 23575.3984375,
+ "learning_rate": 4.6423832258934396e-05,
+ "loss": 0.4179,
+ "step": 122400
+ },
+ {
+ "epoch": 0.631768487418804,
+ "grad_norm": 26795.05078125,
+ "learning_rate": 4.63873966259211e-05,
+ "loss": 0.4171,
+ "step": 122450
+ },
+ {
+ "epoch": 0.6320264574014167,
+ "grad_norm": 22246.931640625,
+ "learning_rate": 4.6350962921245515e-05,
+ "loss": 0.4188,
+ "step": 122500
+ },
+ {
+ "epoch": 0.6322844273840296,
+ "grad_norm": 22268.3359375,
+ "learning_rate": 4.63145311643553e-05,
+ "loss": 0.4141,
+ "step": 122550
+ },
+ {
+ "epoch": 0.6325423973666424,
+ "grad_norm": 23749.38671875,
+ "learning_rate": 4.627810137469696e-05,
+ "loss": 0.4132,
+ "step": 122600
+ },
+ {
+ "epoch": 0.6328003673492553,
+ "grad_norm": 22449.15625,
+ "learning_rate": 4.624167357171606e-05,
+ "loss": 0.4177,
+ "step": 122650
+ },
+ {
+ "epoch": 0.6330583373318681,
+ "grad_norm": 22132.927734375,
+ "learning_rate": 4.6205247774857e-05,
+ "loss": 0.4211,
+ "step": 122700
+ },
+ {
+ "epoch": 0.6333163073144809,
+ "grad_norm": 20199.654296875,
+ "learning_rate": 4.616882400356323e-05,
+ "loss": 0.4127,
+ "step": 122750
+ },
+ {
+ "epoch": 0.6335742772970937,
+ "grad_norm": 23172.29296875,
+ "learning_rate": 4.613240227727699e-05,
+ "loss": 0.4173,
+ "step": 122800
+ },
+ {
+ "epoch": 0.6338322472797066,
+ "grad_norm": 23373.6640625,
+ "learning_rate": 4.609598261543954e-05,
+ "loss": 0.4139,
+ "step": 122850
+ },
+ {
+ "epoch": 0.6340902172623194,
+ "grad_norm": 22187.794921875,
+ "learning_rate": 4.6059565037490965e-05,
+ "loss": 0.4233,
+ "step": 122900
+ },
+ {
+ "epoch": 0.6343481872449321,
+ "grad_norm": 21762.28515625,
+ "learning_rate": 4.602314956287027e-05,
+ "loss": 0.4195,
+ "step": 122950
+ },
+ {
+ "epoch": 0.634606157227545,
+ "grad_norm": 24228.3125,
+ "learning_rate": 4.598673621101535e-05,
+ "loss": 0.4218,
+ "step": 123000
+ },
+ {
+ "epoch": 0.6348641272101578,
+ "grad_norm": 20360.208984375,
+ "learning_rate": 4.595032500136291e-05,
+ "loss": 0.4266,
+ "step": 123050
+ },
+ {
+ "epoch": 0.6351220971927707,
+ "grad_norm": 22763.712890625,
+ "learning_rate": 4.5913915953348574e-05,
+ "loss": 0.4153,
+ "step": 123100
+ },
+ {
+ "epoch": 0.6353800671753834,
+ "grad_norm": 25601.05078125,
+ "learning_rate": 4.5877509086406766e-05,
+ "loss": 0.4201,
+ "step": 123150
+ },
+ {
+ "epoch": 0.6356380371579963,
+ "grad_norm": 22695.91015625,
+ "learning_rate": 4.584110441997081e-05,
+ "loss": 0.4174,
+ "step": 123200
+ },
+ {
+ "epoch": 0.6358960071406091,
+ "grad_norm": 24915.857421875,
+ "learning_rate": 4.5804701973472755e-05,
+ "loss": 0.416,
+ "step": 123250
+ },
+ {
+ "epoch": 0.636153977123222,
+ "grad_norm": 24066.427734375,
+ "learning_rate": 4.576830176634356e-05,
+ "loss": 0.415,
+ "step": 123300
+ },
+ {
+ "epoch": 0.6364119471058348,
+ "grad_norm": 25726.71484375,
+ "learning_rate": 4.573190381801293e-05,
+ "loss": 0.4204,
+ "step": 123350
+ },
+ {
+ "epoch": 0.6366699170884476,
+ "grad_norm": 24271.998046875,
+ "learning_rate": 4.56955081479094e-05,
+ "loss": 0.4166,
+ "step": 123400
+ },
+ {
+ "epoch": 0.6369278870710604,
+ "grad_norm": 20897.818359375,
+ "learning_rate": 4.5659114775460286e-05,
+ "loss": 0.4156,
+ "step": 123450
+ },
+ {
+ "epoch": 0.6371858570536733,
+ "grad_norm": 24409.841796875,
+ "learning_rate": 4.562272372009163e-05,
+ "loss": 0.4208,
+ "step": 123500
+ },
+ {
+ "epoch": 0.6374438270362861,
+ "grad_norm": 24757.927734375,
+ "learning_rate": 4.5586335001228296e-05,
+ "loss": 0.4167,
+ "step": 123550
+ },
+ {
+ "epoch": 0.6377017970188988,
+ "grad_norm": 22433.091796875,
+ "learning_rate": 4.554994863829387e-05,
+ "loss": 0.4206,
+ "step": 123600
+ },
+ {
+ "epoch": 0.6379597670015117,
+ "grad_norm": 22757.798828125,
+ "learning_rate": 4.5513564650710706e-05,
+ "loss": 0.4113,
+ "step": 123650
+ },
+ {
+ "epoch": 0.6382177369841245,
+ "grad_norm": 22652.9140625,
+ "learning_rate": 4.547718305789984e-05,
+ "loss": 0.4224,
+ "step": 123700
+ },
+ {
+ "epoch": 0.6384757069667374,
+ "grad_norm": 25416.0390625,
+ "learning_rate": 4.5440803879281086e-05,
+ "loss": 0.4129,
+ "step": 123750
+ },
+ {
+ "epoch": 0.6387336769493501,
+ "grad_norm": 22621.40625,
+ "learning_rate": 4.5404427134272926e-05,
+ "loss": 0.4204,
+ "step": 123800
+ },
+ {
+ "epoch": 0.638991646931963,
+ "grad_norm": 24213.93359375,
+ "learning_rate": 4.536805284229258e-05,
+ "loss": 0.4109,
+ "step": 123850
+ },
+ {
+ "epoch": 0.6392496169145758,
+ "grad_norm": 20231.091796875,
+ "learning_rate": 4.5331681022755946e-05,
+ "loss": 0.4221,
+ "step": 123900
+ },
+ {
+ "epoch": 0.6395075868971887,
+ "grad_norm": 22513.21875,
+ "learning_rate": 4.529531169507757e-05,
+ "loss": 0.4189,
+ "step": 123950
+ },
+ {
+ "epoch": 0.6397655568798014,
+ "grad_norm": 19454.783203125,
+ "learning_rate": 4.5258944878670714e-05,
+ "loss": 0.4138,
+ "step": 124000
+ },
+ {
+ "epoch": 0.6400235268624143,
+ "grad_norm": 23547.423828125,
+ "learning_rate": 4.522258059294727e-05,
+ "loss": 0.4206,
+ "step": 124050
+ },
+ {
+ "epoch": 0.6402814968450271,
+ "grad_norm": 23985.0703125,
+ "learning_rate": 4.5186218857317825e-05,
+ "loss": 0.4186,
+ "step": 124100
+ },
+ {
+ "epoch": 0.64053946682764,
+ "grad_norm": 22254.078125,
+ "learning_rate": 4.5149859691191517e-05,
+ "loss": 0.4076,
+ "step": 124150
+ },
+ {
+ "epoch": 0.6407974368102528,
+ "grad_norm": 24060.70703125,
+ "learning_rate": 4.5113503113976194e-05,
+ "loss": 0.4207,
+ "step": 124200
+ },
+ {
+ "epoch": 0.6410554067928655,
+ "grad_norm": 21521.923828125,
+ "learning_rate": 4.5077149145078275e-05,
+ "loss": 0.4134,
+ "step": 124250
+ },
+ {
+ "epoch": 0.6413133767754784,
+ "grad_norm": 22107.48828125,
+ "learning_rate": 4.504079780390282e-05,
+ "loss": 0.4095,
+ "step": 124300
+ },
+ {
+ "epoch": 0.6415713467580912,
+ "grad_norm": 22610.880859375,
+ "learning_rate": 4.5004449109853485e-05,
+ "loss": 0.4216,
+ "step": 124350
+ },
+ {
+ "epoch": 0.6418293167407041,
+ "grad_norm": 22752.83984375,
+ "learning_rate": 4.496810308233247e-05,
+ "loss": 0.4225,
+ "step": 124400
+ },
+ {
+ "epoch": 0.6420872867233168,
+ "grad_norm": 22029.88671875,
+ "learning_rate": 4.4931759740740596e-05,
+ "loss": 0.4138,
+ "step": 124450
+ },
+ {
+ "epoch": 0.6423452567059297,
+ "grad_norm": 24989.2421875,
+ "learning_rate": 4.489541910447722e-05,
+ "loss": 0.4166,
+ "step": 124500
+ },
+ {
+ "epoch": 0.6426032266885425,
+ "grad_norm": 25843.16796875,
+ "learning_rate": 4.485908119294031e-05,
+ "loss": 0.4132,
+ "step": 124550
+ },
+ {
+ "epoch": 0.6428611966711554,
+ "grad_norm": 23847.01171875,
+ "learning_rate": 4.4822746025526286e-05,
+ "loss": 0.4256,
+ "step": 124600
+ },
+ {
+ "epoch": 0.6431191666537681,
+ "grad_norm": 21634.71484375,
+ "learning_rate": 4.478641362163019e-05,
+ "loss": 0.4182,
+ "step": 124650
+ },
+ {
+ "epoch": 0.643377136636381,
+ "grad_norm": 22252.021484375,
+ "learning_rate": 4.475008400064554e-05,
+ "loss": 0.419,
+ "step": 124700
+ },
+ {
+ "epoch": 0.6436351066189938,
+ "grad_norm": 24151.951171875,
+ "learning_rate": 4.471375718196439e-05,
+ "loss": 0.4201,
+ "step": 124750
+ },
+ {
+ "epoch": 0.6438930766016067,
+ "grad_norm": 23570.310546875,
+ "learning_rate": 4.4677433184977315e-05,
+ "loss": 0.4131,
+ "step": 124800
+ },
+ {
+ "epoch": 0.6441510465842195,
+ "grad_norm": 23886.896484375,
+ "learning_rate": 4.464111202907332e-05,
+ "loss": 0.4172,
+ "step": 124850
+ },
+ {
+ "epoch": 0.6444090165668322,
+ "grad_norm": 23476.888671875,
+ "learning_rate": 4.4604793733639973e-05,
+ "loss": 0.419,
+ "step": 124900
+ },
+ {
+ "epoch": 0.6446669865494451,
+ "grad_norm": 22735.759765625,
+ "learning_rate": 4.456847831806324e-05,
+ "loss": 0.4214,
+ "step": 124950
+ },
+ {
+ "epoch": 0.644924956532058,
+ "grad_norm": 25508.525390625,
+ "learning_rate": 4.4532165801727626e-05,
+ "loss": 0.4184,
+ "step": 125000
+ },
+ {
+ "epoch": 0.644924956532058,
+ "eval_loss": 0.40382638573646545,
+ "eval_runtime": 3215.6548,
+ "eval_samples_per_second": 964.382,
+ "eval_steps_per_second": 1.884,
+ "step": 125000
+ },
+ {
+ "epoch": 0.6451829265146708,
+ "grad_norm": 23686.8671875,
+ "learning_rate": 4.449585620401601e-05,
+ "loss": 0.4115,
+ "step": 125050
+ },
+ {
+ "epoch": 0.6454408964972835,
+ "grad_norm": 22472.7421875,
+ "learning_rate": 4.445954954430976e-05,
+ "loss": 0.4187,
+ "step": 125100
+ },
+ {
+ "epoch": 0.6456988664798964,
+ "grad_norm": 25044.5859375,
+ "learning_rate": 4.442324584198871e-05,
+ "loss": 0.4188,
+ "step": 125150
+ },
+ {
+ "epoch": 0.6459568364625092,
+ "grad_norm": 23489.119140625,
+ "learning_rate": 4.4386945116431025e-05,
+ "loss": 0.4212,
+ "step": 125200
+ },
+ {
+ "epoch": 0.6462148064451221,
+ "grad_norm": 23150.12109375,
+ "learning_rate": 4.435064738701335e-05,
+ "loss": 0.4155,
+ "step": 125250
+ },
+ {
+ "epoch": 0.6464727764277348,
+ "grad_norm": 22082.09765625,
+ "learning_rate": 4.4314352673110696e-05,
+ "loss": 0.4208,
+ "step": 125300
+ },
+ {
+ "epoch": 0.6467307464103477,
+ "grad_norm": 23107.71484375,
+ "learning_rate": 4.427806099409652e-05,
+ "loss": 0.4172,
+ "step": 125350
+ },
+ {
+ "epoch": 0.6469887163929605,
+ "grad_norm": 23660.607421875,
+ "learning_rate": 4.4241772369342554e-05,
+ "loss": 0.4156,
+ "step": 125400
+ },
+ {
+ "epoch": 0.6472466863755734,
+ "grad_norm": 22054.47265625,
+ "learning_rate": 4.420548681821901e-05,
+ "loss": 0.4174,
+ "step": 125450
+ },
+ {
+ "epoch": 0.6475046563581862,
+ "grad_norm": 22386.654296875,
+ "learning_rate": 4.416920436009439e-05,
+ "loss": 0.4164,
+ "step": 125500
+ },
+ {
+ "epoch": 0.647762626340799,
+ "grad_norm": 22394.78125,
+ "learning_rate": 4.413292501433557e-05,
+ "loss": 0.4128,
+ "step": 125550
+ },
+ {
+ "epoch": 0.6480205963234118,
+ "grad_norm": 21871.1953125,
+ "learning_rate": 4.4096648800307796e-05,
+ "loss": 0.4174,
+ "step": 125600
+ },
+ {
+ "epoch": 0.6482785663060247,
+ "grad_norm": 21630.826171875,
+ "learning_rate": 4.406037573737456e-05,
+ "loss": 0.4146,
+ "step": 125650
+ },
+ {
+ "epoch": 0.6485365362886375,
+ "grad_norm": 20917.244140625,
+ "learning_rate": 4.4024105844897744e-05,
+ "loss": 0.4172,
+ "step": 125700
+ },
+ {
+ "epoch": 0.6487945062712502,
+ "grad_norm": 21545.53515625,
+ "learning_rate": 4.3987839142237505e-05,
+ "loss": 0.4189,
+ "step": 125750
+ },
+ {
+ "epoch": 0.6490524762538631,
+ "grad_norm": 27708.19140625,
+ "learning_rate": 4.395157564875234e-05,
+ "loss": 0.4127,
+ "step": 125800
+ },
+ {
+ "epoch": 0.6493104462364759,
+ "grad_norm": 23791.052734375,
+ "learning_rate": 4.391531538379895e-05,
+ "loss": 0.4146,
+ "step": 125850
+ },
+ {
+ "epoch": 0.6495684162190888,
+ "grad_norm": 23441.0078125,
+ "learning_rate": 4.387905836673239e-05,
+ "loss": 0.4191,
+ "step": 125900
+ },
+ {
+ "epoch": 0.6498263862017015,
+ "grad_norm": 21998.982421875,
+ "learning_rate": 4.3842804616905944e-05,
+ "loss": 0.4165,
+ "step": 125950
+ },
+ {
+ "epoch": 0.6500843561843144,
+ "grad_norm": 26170.572265625,
+ "learning_rate": 4.380655415367116e-05,
+ "loss": 0.4106,
+ "step": 126000
+ },
+ {
+ "epoch": 0.6503423261669272,
+ "grad_norm": 23915.345703125,
+ "learning_rate": 4.3770306996377866e-05,
+ "loss": 0.417,
+ "step": 126050
+ },
+ {
+ "epoch": 0.6506002961495401,
+ "grad_norm": 22807.23828125,
+ "learning_rate": 4.373406316437404e-05,
+ "loss": 0.4138,
+ "step": 126100
+ },
+ {
+ "epoch": 0.6508582661321528,
+ "grad_norm": 22825.060546875,
+ "learning_rate": 4.369782267700598e-05,
+ "loss": 0.4159,
+ "step": 126150
+ },
+ {
+ "epoch": 0.6511162361147657,
+ "grad_norm": 21670.83984375,
+ "learning_rate": 4.366158555361812e-05,
+ "loss": 0.4131,
+ "step": 126200
+ },
+ {
+ "epoch": 0.6513742060973785,
+ "grad_norm": 24840.630859375,
+ "learning_rate": 4.362535181355319e-05,
+ "loss": 0.4072,
+ "step": 126250
+ },
+ {
+ "epoch": 0.6516321760799914,
+ "grad_norm": 24121.158203125,
+ "learning_rate": 4.358912147615199e-05,
+ "loss": 0.4085,
+ "step": 126300
+ },
+ {
+ "epoch": 0.6518901460626042,
+ "grad_norm": 21738.236328125,
+ "learning_rate": 4.355289456075363e-05,
+ "loss": 0.4154,
+ "step": 126350
+ },
+ {
+ "epoch": 0.6521481160452169,
+ "grad_norm": 24880.833984375,
+ "learning_rate": 4.3516671086695296e-05,
+ "loss": 0.4154,
+ "step": 126400
+ },
+ {
+ "epoch": 0.6524060860278298,
+ "grad_norm": 21572.140625,
+ "learning_rate": 4.348045107331239e-05,
+ "loss": 0.4185,
+ "step": 126450
+ },
+ {
+ "epoch": 0.6526640560104426,
+ "grad_norm": 24076.17578125,
+ "learning_rate": 4.344423453993849e-05,
+ "loss": 0.4132,
+ "step": 126500
+ },
+ {
+ "epoch": 0.6529220259930555,
+ "grad_norm": 23531.365234375,
+ "learning_rate": 4.340802150590522e-05,
+ "loss": 0.4179,
+ "step": 126550
+ },
+ {
+ "epoch": 0.6531799959756682,
+ "grad_norm": 24287.568359375,
+ "learning_rate": 4.337181199054243e-05,
+ "loss": 0.4136,
+ "step": 126600
+ },
+ {
+ "epoch": 0.6534379659582811,
+ "grad_norm": 23352.52734375,
+ "learning_rate": 4.3335606013178046e-05,
+ "loss": 0.4177,
+ "step": 126650
+ },
+ {
+ "epoch": 0.6536959359408939,
+ "grad_norm": 22291.494140625,
+ "learning_rate": 4.3299403593138144e-05,
+ "loss": 0.4155,
+ "step": 126700
+ },
+ {
+ "epoch": 0.6539539059235068,
+ "grad_norm": 20745.798828125,
+ "learning_rate": 4.3263204749746836e-05,
+ "loss": 0.4139,
+ "step": 126750
+ },
+ {
+ "epoch": 0.6542118759061195,
+ "grad_norm": 24670.357421875,
+ "learning_rate": 4.322700950232639e-05,
+ "loss": 0.423,
+ "step": 126800
+ },
+ {
+ "epoch": 0.6544698458887324,
+ "grad_norm": 23067.81640625,
+ "learning_rate": 4.31908178701971e-05,
+ "loss": 0.4174,
+ "step": 126850
+ },
+ {
+ "epoch": 0.6547278158713452,
+ "grad_norm": 25275.47265625,
+ "learning_rate": 4.315462987267739e-05,
+ "loss": 0.4181,
+ "step": 126900
+ },
+ {
+ "epoch": 0.6549857858539581,
+ "grad_norm": 21032.4375,
+ "learning_rate": 4.311844552908372e-05,
+ "loss": 0.4111,
+ "step": 126950
+ },
+ {
+ "epoch": 0.6552437558365709,
+ "grad_norm": 21629.0625,
+ "learning_rate": 4.308226485873056e-05,
+ "loss": 0.4129,
+ "step": 127000
+ },
+ {
+ "epoch": 0.6555017258191836,
+ "grad_norm": 24375.935546875,
+ "learning_rate": 4.3046087880930466e-05,
+ "loss": 0.4129,
+ "step": 127050
+ },
+ {
+ "epoch": 0.6557596958017965,
+ "grad_norm": 21224.63671875,
+ "learning_rate": 4.3009914614994e-05,
+ "loss": 0.4156,
+ "step": 127100
+ },
+ {
+ "epoch": 0.6560176657844093,
+ "grad_norm": 24836.560546875,
+ "learning_rate": 4.297374508022977e-05,
+ "loss": 0.4133,
+ "step": 127150
+ },
+ {
+ "epoch": 0.6562756357670222,
+ "grad_norm": 22769.599609375,
+ "learning_rate": 4.293757929594435e-05,
+ "loss": 0.4151,
+ "step": 127200
+ },
+ {
+ "epoch": 0.6565336057496349,
+ "grad_norm": 22936.603515625,
+ "learning_rate": 4.2901417281442345e-05,
+ "loss": 0.4173,
+ "step": 127250
+ },
+ {
+ "epoch": 0.6567915757322478,
+ "grad_norm": 21296.39453125,
+ "learning_rate": 4.286525905602634e-05,
+ "loss": 0.4121,
+ "step": 127300
+ },
+ {
+ "epoch": 0.6570495457148606,
+ "grad_norm": 24282.591796875,
+ "learning_rate": 4.282910463899689e-05,
+ "loss": 0.4086,
+ "step": 127350
+ },
+ {
+ "epoch": 0.6573075156974735,
+ "grad_norm": 22443.6015625,
+ "learning_rate": 4.2792954049652545e-05,
+ "loss": 0.4183,
+ "step": 127400
+ },
+ {
+ "epoch": 0.6575654856800862,
+ "grad_norm": 21437.98046875,
+ "learning_rate": 4.275680730728976e-05,
+ "loss": 0.4172,
+ "step": 127450
+ },
+ {
+ "epoch": 0.6578234556626991,
+ "grad_norm": 24970.3125,
+ "learning_rate": 4.2720664431202987e-05,
+ "loss": 0.4187,
+ "step": 127500
+ },
+ {
+ "epoch": 0.6580814256453119,
+ "grad_norm": 21128.349609375,
+ "learning_rate": 4.268452544068457e-05,
+ "loss": 0.4142,
+ "step": 127550
+ },
+ {
+ "epoch": 0.6583393956279248,
+ "grad_norm": 26429.14453125,
+ "learning_rate": 4.2648390355024836e-05,
+ "loss": 0.4115,
+ "step": 127600
+ },
+ {
+ "epoch": 0.6585973656105376,
+ "grad_norm": 22542.380859375,
+ "learning_rate": 4.261225919351195e-05,
+ "loss": 0.4144,
+ "step": 127650
+ },
+ {
+ "epoch": 0.6588553355931503,
+ "grad_norm": 23179.853515625,
+ "learning_rate": 4.257613197543207e-05,
+ "loss": 0.4164,
+ "step": 127700
+ },
+ {
+ "epoch": 0.6591133055757632,
+ "grad_norm": 24641.048828125,
+ "learning_rate": 4.254000872006918e-05,
+ "loss": 0.4175,
+ "step": 127750
+ },
+ {
+ "epoch": 0.659371275558376,
+ "grad_norm": 23836.771484375,
+ "learning_rate": 4.250388944670517e-05,
+ "loss": 0.4201,
+ "step": 127800
+ },
+ {
+ "epoch": 0.6596292455409889,
+ "grad_norm": 23714.7578125,
+ "learning_rate": 4.2467774174619836e-05,
+ "loss": 0.4102,
+ "step": 127850
+ },
+ {
+ "epoch": 0.6598872155236016,
+ "grad_norm": 23630.2890625,
+ "learning_rate": 4.2431662923090785e-05,
+ "loss": 0.411,
+ "step": 127900
+ },
+ {
+ "epoch": 0.6601451855062145,
+ "grad_norm": 23018.384765625,
+ "learning_rate": 4.239555571139353e-05,
+ "loss": 0.4113,
+ "step": 127950
+ },
+ {
+ "epoch": 0.6604031554888273,
+ "grad_norm": 23594.041015625,
+ "learning_rate": 4.235945255880137e-05,
+ "loss": 0.4153,
+ "step": 128000
+ },
+ {
+ "epoch": 0.6606611254714402,
+ "grad_norm": 24231.07421875,
+ "learning_rate": 4.232335348458549e-05,
+ "loss": 0.4159,
+ "step": 128050
+ },
+ {
+ "epoch": 0.6609190954540529,
+ "grad_norm": 22362.98828125,
+ "learning_rate": 4.228725850801486e-05,
+ "loss": 0.4218,
+ "step": 128100
+ },
+ {
+ "epoch": 0.6611770654366658,
+ "grad_norm": 23008.44140625,
+ "learning_rate": 4.225116764835631e-05,
+ "loss": 0.416,
+ "step": 128150
+ },
+ {
+ "epoch": 0.6614350354192786,
+ "grad_norm": 23027.1875,
+ "learning_rate": 4.221508092487441e-05,
+ "loss": 0.4163,
+ "step": 128200
+ },
+ {
+ "epoch": 0.6616930054018915,
+ "grad_norm": 25121.61328125,
+ "learning_rate": 4.2178998356831553e-05,
+ "loss": 0.4167,
+ "step": 128250
+ },
+ {
+ "epoch": 0.6619509753845043,
+ "grad_norm": 24767.4140625,
+ "learning_rate": 4.214291996348794e-05,
+ "loss": 0.4176,
+ "step": 128300
+ },
+ {
+ "epoch": 0.662208945367117,
+ "grad_norm": 24596.533203125,
+ "learning_rate": 4.210684576410151e-05,
+ "loss": 0.4183,
+ "step": 128350
+ },
+ {
+ "epoch": 0.6624669153497299,
+ "grad_norm": 21095.8671875,
+ "learning_rate": 4.2070775777927976e-05,
+ "loss": 0.4151,
+ "step": 128400
+ },
+ {
+ "epoch": 0.6627248853323428,
+ "grad_norm": 25389.1640625,
+ "learning_rate": 4.203471002422077e-05,
+ "loss": 0.4226,
+ "step": 128450
+ },
+ {
+ "epoch": 0.6629828553149556,
+ "grad_norm": 24613.94921875,
+ "learning_rate": 4.199864852223113e-05,
+ "loss": 0.4093,
+ "step": 128500
+ },
+ {
+ "epoch": 0.6632408252975683,
+ "grad_norm": 23665.59765625,
+ "learning_rate": 4.196259129120796e-05,
+ "loss": 0.4135,
+ "step": 128550
+ },
+ {
+ "epoch": 0.6634987952801812,
+ "grad_norm": 22946.5234375,
+ "learning_rate": 4.192653835039795e-05,
+ "loss": 0.4151,
+ "step": 128600
+ },
+ {
+ "epoch": 0.663756765262794,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 4.189048971904541e-05,
+ "loss": 0.4064,
+ "step": 128650
+ },
+ {
+ "epoch": 0.6640147352454069,
+ "grad_norm": 22760.623046875,
+ "learning_rate": 4.185444541639243e-05,
+ "loss": 0.4084,
+ "step": 128700
+ },
+ {
+ "epoch": 0.6642727052280196,
+ "grad_norm": 25223.484375,
+ "learning_rate": 4.1818405461678763e-05,
+ "loss": 0.4151,
+ "step": 128750
+ },
+ {
+ "epoch": 0.6645306752106325,
+ "grad_norm": 31547.962890625,
+ "learning_rate": 4.178236987414182e-05,
+ "loss": 0.4115,
+ "step": 128800
+ },
+ {
+ "epoch": 0.6647886451932453,
+ "grad_norm": 19114.953125,
+ "learning_rate": 4.174633867301674e-05,
+ "loss": 0.4109,
+ "step": 128850
+ },
+ {
+ "epoch": 0.6650466151758582,
+ "grad_norm": 22819.888671875,
+ "learning_rate": 4.1710311877536226e-05,
+ "loss": 0.4123,
+ "step": 128900
+ },
+ {
+ "epoch": 0.6653045851584709,
+ "grad_norm": 22868.62890625,
+ "learning_rate": 4.167428950693073e-05,
+ "loss": 0.413,
+ "step": 128950
+ },
+ {
+ "epoch": 0.6655625551410838,
+ "grad_norm": 23062.359375,
+ "learning_rate": 4.163827158042826e-05,
+ "loss": 0.4152,
+ "step": 129000
+ },
+ {
+ "epoch": 0.6658205251236966,
+ "grad_norm": 25990.505859375,
+ "learning_rate": 4.160225811725453e-05,
+ "loss": 0.4176,
+ "step": 129050
+ },
+ {
+ "epoch": 0.6660784951063095,
+ "grad_norm": 21594.1953125,
+ "learning_rate": 4.156624913663279e-05,
+ "loss": 0.4136,
+ "step": 129100
+ },
+ {
+ "epoch": 0.6663364650889223,
+ "grad_norm": 21145.869140625,
+ "learning_rate": 4.153024465778393e-05,
+ "loss": 0.4216,
+ "step": 129150
+ },
+ {
+ "epoch": 0.666594435071535,
+ "grad_norm": 22634.7734375,
+ "learning_rate": 4.149424469992649e-05,
+ "loss": 0.4114,
+ "step": 129200
+ },
+ {
+ "epoch": 0.6668524050541479,
+ "grad_norm": 23526.46875,
+ "learning_rate": 4.145824928227652e-05,
+ "loss": 0.4217,
+ "step": 129250
+ },
+ {
+ "epoch": 0.6671103750367607,
+ "grad_norm": 22295.880859375,
+ "learning_rate": 4.142225842404769e-05,
+ "loss": 0.4169,
+ "step": 129300
+ },
+ {
+ "epoch": 0.6673683450193736,
+ "grad_norm": 22282.421875,
+ "learning_rate": 4.13862721444512e-05,
+ "loss": 0.4195,
+ "step": 129350
+ },
+ {
+ "epoch": 0.6676263150019863,
+ "grad_norm": 21856.337890625,
+ "learning_rate": 4.135029046269585e-05,
+ "loss": 0.4229,
+ "step": 129400
+ },
+ {
+ "epoch": 0.6678842849845992,
+ "grad_norm": 20999.04296875,
+ "learning_rate": 4.131431339798796e-05,
+ "loss": 0.4168,
+ "step": 129450
+ },
+ {
+ "epoch": 0.668142254967212,
+ "grad_norm": 24684.484375,
+ "learning_rate": 4.12783409695314e-05,
+ "loss": 0.4117,
+ "step": 129500
+ },
+ {
+ "epoch": 0.6684002249498249,
+ "grad_norm": 24120.349609375,
+ "learning_rate": 4.124237319652753e-05,
+ "loss": 0.4186,
+ "step": 129550
+ },
+ {
+ "epoch": 0.6686581949324376,
+ "grad_norm": 23283.736328125,
+ "learning_rate": 4.1206410098175265e-05,
+ "loss": 0.4176,
+ "step": 129600
+ },
+ {
+ "epoch": 0.6689161649150505,
+ "grad_norm": 21902.6875,
+ "learning_rate": 4.117045169367102e-05,
+ "loss": 0.4153,
+ "step": 129650
+ },
+ {
+ "epoch": 0.6691741348976633,
+ "grad_norm": 22762.6015625,
+ "learning_rate": 4.1134498002208674e-05,
+ "loss": 0.414,
+ "step": 129700
+ },
+ {
+ "epoch": 0.6694321048802762,
+ "grad_norm": 20947.083984375,
+ "learning_rate": 4.109854904297965e-05,
+ "loss": 0.4113,
+ "step": 129750
+ },
+ {
+ "epoch": 0.669690074862889,
+ "grad_norm": 24687.189453125,
+ "learning_rate": 4.106260483517276e-05,
+ "loss": 0.4207,
+ "step": 129800
+ },
+ {
+ "epoch": 0.6699480448455017,
+ "grad_norm": 24164.724609375,
+ "learning_rate": 4.102666539797435e-05,
+ "loss": 0.4116,
+ "step": 129850
+ },
+ {
+ "epoch": 0.6702060148281146,
+ "grad_norm": 23408.68359375,
+ "learning_rate": 4.099073075056818e-05,
+ "loss": 0.4181,
+ "step": 129900
+ },
+ {
+ "epoch": 0.6704639848107274,
+ "grad_norm": 22822.3515625,
+ "learning_rate": 4.0954800912135516e-05,
+ "loss": 0.4176,
+ "step": 129950
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "grad_norm": 21576.173828125,
+ "learning_rate": 4.091887590185494e-05,
+ "loss": 0.4165,
+ "step": 130000
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "eval_loss": 0.40186887979507446,
+ "eval_runtime": 3150.7117,
+ "eval_samples_per_second": 984.26,
+ "eval_steps_per_second": 1.922,
+ "step": 130000
+ },
+ {
+ "epoch": 0.670979924775953,
+ "grad_norm": 21987.3671875,
+ "learning_rate": 4.0882955738902576e-05,
+ "loss": 0.4176,
+ "step": 130050
+ },
+ {
+ "epoch": 0.6712378947585659,
+ "grad_norm": 23900.74609375,
+ "learning_rate": 4.0847040442451895e-05,
+ "loss": 0.4183,
+ "step": 130100
+ },
+ {
+ "epoch": 0.6714958647411787,
+ "grad_norm": 22624.236328125,
+ "learning_rate": 4.081113003167378e-05,
+ "loss": 0.4146,
+ "step": 130150
+ },
+ {
+ "epoch": 0.6717538347237916,
+ "grad_norm": 22636.490234375,
+ "learning_rate": 4.0775224525736546e-05,
+ "loss": 0.4107,
+ "step": 130200
+ },
+ {
+ "epoch": 0.6720118047064043,
+ "grad_norm": 22667.66796875,
+ "learning_rate": 4.07393239438058e-05,
+ "loss": 0.4151,
+ "step": 130250
+ },
+ {
+ "epoch": 0.6722697746890172,
+ "grad_norm": 20381.720703125,
+ "learning_rate": 4.070342830504465e-05,
+ "loss": 0.4167,
+ "step": 130300
+ },
+ {
+ "epoch": 0.67252774467163,
+ "grad_norm": 22913.248046875,
+ "learning_rate": 4.0667537628613424e-05,
+ "loss": 0.4116,
+ "step": 130350
+ },
+ {
+ "epoch": 0.6727857146542429,
+ "grad_norm": 23168.865234375,
+ "learning_rate": 4.063165193366992e-05,
+ "loss": 0.413,
+ "step": 130400
+ },
+ {
+ "epoch": 0.6730436846368557,
+ "grad_norm": 21597.861328125,
+ "learning_rate": 4.059577123936918e-05,
+ "loss": 0.4179,
+ "step": 130450
+ },
+ {
+ "epoch": 0.6733016546194684,
+ "grad_norm": 20305.806640625,
+ "learning_rate": 4.055989556486365e-05,
+ "loss": 0.4199,
+ "step": 130500
+ },
+ {
+ "epoch": 0.6735596246020813,
+ "grad_norm": 23520.173828125,
+ "learning_rate": 4.052402492930311e-05,
+ "loss": 0.4154,
+ "step": 130550
+ },
+ {
+ "epoch": 0.6738175945846941,
+ "grad_norm": 23356.85546875,
+ "learning_rate": 4.048815935183453e-05,
+ "loss": 0.4154,
+ "step": 130600
+ },
+ {
+ "epoch": 0.674075564567307,
+ "grad_norm": 22958.611328125,
+ "learning_rate": 4.0452298851602324e-05,
+ "loss": 0.4149,
+ "step": 130650
+ },
+ {
+ "epoch": 0.6743335345499197,
+ "grad_norm": 24888.25390625,
+ "learning_rate": 4.04164434477481e-05,
+ "loss": 0.4166,
+ "step": 130700
+ },
+ {
+ "epoch": 0.6745915045325326,
+ "grad_norm": 22958.189453125,
+ "learning_rate": 4.0380593159410806e-05,
+ "loss": 0.4159,
+ "step": 130750
+ },
+ {
+ "epoch": 0.6748494745151454,
+ "grad_norm": 21863.55859375,
+ "learning_rate": 4.03447480057266e-05,
+ "loss": 0.4142,
+ "step": 130800
+ },
+ {
+ "epoch": 0.6751074444977583,
+ "grad_norm": 23096.375,
+ "learning_rate": 4.030890800582895e-05,
+ "loss": 0.4108,
+ "step": 130850
+ },
+ {
+ "epoch": 0.675365414480371,
+ "grad_norm": 23506.576171875,
+ "learning_rate": 4.027307317884854e-05,
+ "loss": 0.4111,
+ "step": 130900
+ },
+ {
+ "epoch": 0.6756233844629839,
+ "grad_norm": 26913.11328125,
+ "learning_rate": 4.023724354391331e-05,
+ "loss": 0.4145,
+ "step": 130950
+ },
+ {
+ "epoch": 0.6758813544455967,
+ "grad_norm": 22008.958984375,
+ "learning_rate": 4.020141912014846e-05,
+ "loss": 0.4118,
+ "step": 131000
+ },
+ {
+ "epoch": 0.6761393244282096,
+ "grad_norm": 21431.857421875,
+ "learning_rate": 4.016559992667632e-05,
+ "loss": 0.417,
+ "step": 131050
+ },
+ {
+ "epoch": 0.6763972944108223,
+ "grad_norm": 24077.453125,
+ "learning_rate": 4.0129785982616524e-05,
+ "loss": 0.4121,
+ "step": 131100
+ },
+ {
+ "epoch": 0.6766552643934352,
+ "grad_norm": 22978.5390625,
+ "learning_rate": 4.009397730708583e-05,
+ "loss": 0.4074,
+ "step": 131150
+ },
+ {
+ "epoch": 0.676913234376048,
+ "grad_norm": 25474.740234375,
+ "learning_rate": 4.005817391919826e-05,
+ "loss": 0.4159,
+ "step": 131200
+ },
+ {
+ "epoch": 0.6771712043586608,
+ "grad_norm": 23532.416015625,
+ "learning_rate": 4.0022375838064904e-05,
+ "loss": 0.4202,
+ "step": 131250
+ },
+ {
+ "epoch": 0.6774291743412737,
+ "grad_norm": 23746.072265625,
+ "learning_rate": 3.998658308279414e-05,
+ "loss": 0.4157,
+ "step": 131300
+ },
+ {
+ "epoch": 0.6776871443238864,
+ "grad_norm": 21691.6875,
+ "learning_rate": 3.995079567249142e-05,
+ "loss": 0.4158,
+ "step": 131350
+ },
+ {
+ "epoch": 0.6779451143064993,
+ "grad_norm": 24167.923828125,
+ "learning_rate": 3.991501362625937e-05,
+ "loss": 0.4165,
+ "step": 131400
+ },
+ {
+ "epoch": 0.6782030842891121,
+ "grad_norm": 22420.27734375,
+ "learning_rate": 3.9879236963197784e-05,
+ "loss": 0.418,
+ "step": 131450
+ },
+ {
+ "epoch": 0.678461054271725,
+ "grad_norm": 22116.75,
+ "learning_rate": 3.984346570240352e-05,
+ "loss": 0.4152,
+ "step": 131500
+ },
+ {
+ "epoch": 0.6787190242543377,
+ "grad_norm": 23841.001953125,
+ "learning_rate": 3.9807699862970596e-05,
+ "loss": 0.4179,
+ "step": 131550
+ },
+ {
+ "epoch": 0.6789769942369506,
+ "grad_norm": 22931.126953125,
+ "learning_rate": 3.977193946399011e-05,
+ "loss": 0.4171,
+ "step": 131600
+ },
+ {
+ "epoch": 0.6792349642195634,
+ "grad_norm": 24939.294921875,
+ "learning_rate": 3.973618452455031e-05,
+ "loss": 0.4147,
+ "step": 131650
+ },
+ {
+ "epoch": 0.6794929342021763,
+ "grad_norm": 22026.615234375,
+ "learning_rate": 3.970043506373644e-05,
+ "loss": 0.4084,
+ "step": 131700
+ },
+ {
+ "epoch": 0.679750904184789,
+ "grad_norm": 24636.595703125,
+ "learning_rate": 3.9664691100630904e-05,
+ "loss": 0.4137,
+ "step": 131750
+ },
+ {
+ "epoch": 0.6800088741674019,
+ "grad_norm": 25599.443359375,
+ "learning_rate": 3.962895265431311e-05,
+ "loss": 0.4167,
+ "step": 131800
+ },
+ {
+ "epoch": 0.6802668441500147,
+ "grad_norm": 23514.0078125,
+ "learning_rate": 3.9593219743859575e-05,
+ "loss": 0.408,
+ "step": 131850
+ },
+ {
+ "epoch": 0.6805248141326276,
+ "grad_norm": 21798.9609375,
+ "learning_rate": 3.9557492388343844e-05,
+ "loss": 0.4129,
+ "step": 131900
+ },
+ {
+ "epoch": 0.6807827841152404,
+ "grad_norm": 24803.248046875,
+ "learning_rate": 3.952177060683644e-05,
+ "loss": 0.4126,
+ "step": 131950
+ },
+ {
+ "epoch": 0.6810407540978531,
+ "grad_norm": 23215.529296875,
+ "learning_rate": 3.948605441840501e-05,
+ "loss": 0.4114,
+ "step": 132000
+ },
+ {
+ "epoch": 0.681298724080466,
+ "grad_norm": 21179.626953125,
+ "learning_rate": 3.945034384211412e-05,
+ "loss": 0.4139,
+ "step": 132050
+ },
+ {
+ "epoch": 0.6815566940630788,
+ "grad_norm": 22894.04296875,
+ "learning_rate": 3.941463889702543e-05,
+ "loss": 0.4144,
+ "step": 132100
+ },
+ {
+ "epoch": 0.6818146640456917,
+ "grad_norm": 22581.392578125,
+ "learning_rate": 3.937893960219751e-05,
+ "loss": 0.4163,
+ "step": 132150
+ },
+ {
+ "epoch": 0.6820726340283044,
+ "grad_norm": 27557.634765625,
+ "learning_rate": 3.9343245976685966e-05,
+ "loss": 0.4194,
+ "step": 132200
+ },
+ {
+ "epoch": 0.6823306040109173,
+ "grad_norm": 24157.97265625,
+ "learning_rate": 3.9307558039543355e-05,
+ "loss": 0.4089,
+ "step": 132250
+ },
+ {
+ "epoch": 0.6825885739935301,
+ "grad_norm": 23363.904296875,
+ "learning_rate": 3.927187580981922e-05,
+ "loss": 0.4108,
+ "step": 132300
+ },
+ {
+ "epoch": 0.682846543976143,
+ "grad_norm": 24005.15625,
+ "learning_rate": 3.9236199306560054e-05,
+ "loss": 0.4103,
+ "step": 132350
+ },
+ {
+ "epoch": 0.6831045139587557,
+ "grad_norm": 23476.4609375,
+ "learning_rate": 3.920052854880925e-05,
+ "loss": 0.4189,
+ "step": 132400
+ },
+ {
+ "epoch": 0.6833624839413686,
+ "grad_norm": 23734.173828125,
+ "learning_rate": 3.91648635556072e-05,
+ "loss": 0.4183,
+ "step": 132450
+ },
+ {
+ "epoch": 0.6836204539239814,
+ "grad_norm": 22112.642578125,
+ "learning_rate": 3.912920434599117e-05,
+ "loss": 0.4139,
+ "step": 132500
+ },
+ {
+ "epoch": 0.6838784239065943,
+ "grad_norm": 23442.96484375,
+ "learning_rate": 3.909355093899537e-05,
+ "loss": 0.4137,
+ "step": 132550
+ },
+ {
+ "epoch": 0.6841363938892071,
+ "grad_norm": 22873.734375,
+ "learning_rate": 3.905790335365087e-05,
+ "loss": 0.4097,
+ "step": 132600
+ },
+ {
+ "epoch": 0.6843943638718198,
+ "grad_norm": 24382.9140625,
+ "learning_rate": 3.902226160898567e-05,
+ "loss": 0.4134,
+ "step": 132650
+ },
+ {
+ "epoch": 0.6846523338544327,
+ "grad_norm": 23238.1953125,
+ "learning_rate": 3.898662572402468e-05,
+ "loss": 0.4137,
+ "step": 132700
+ },
+ {
+ "epoch": 0.6849103038370455,
+ "grad_norm": 21690.37890625,
+ "learning_rate": 3.89509957177896e-05,
+ "loss": 0.4114,
+ "step": 132750
+ },
+ {
+ "epoch": 0.6851682738196584,
+ "grad_norm": 25762.189453125,
+ "learning_rate": 3.891537160929907e-05,
+ "loss": 0.4134,
+ "step": 132800
+ },
+ {
+ "epoch": 0.6854262438022711,
+ "grad_norm": 22006.044921875,
+ "learning_rate": 3.88797534175685e-05,
+ "loss": 0.4132,
+ "step": 132850
+ },
+ {
+ "epoch": 0.685684213784884,
+ "grad_norm": 22149.5546875,
+ "learning_rate": 3.8844141161610256e-05,
+ "loss": 0.4154,
+ "step": 132900
+ },
+ {
+ "epoch": 0.6859421837674968,
+ "grad_norm": 23865.419921875,
+ "learning_rate": 3.880853486043343e-05,
+ "loss": 0.4135,
+ "step": 132950
+ },
+ {
+ "epoch": 0.6862001537501097,
+ "grad_norm": 22708.126953125,
+ "learning_rate": 3.877293453304399e-05,
+ "loss": 0.4143,
+ "step": 133000
+ },
+ {
+ "epoch": 0.6864581237327224,
+ "grad_norm": 19948.517578125,
+ "learning_rate": 3.8737340198444683e-05,
+ "loss": 0.4181,
+ "step": 133050
+ },
+ {
+ "epoch": 0.6867160937153353,
+ "grad_norm": 22594.826171875,
+ "learning_rate": 3.870175187563509e-05,
+ "loss": 0.4108,
+ "step": 133100
+ },
+ {
+ "epoch": 0.6869740636979481,
+ "grad_norm": 24876.56640625,
+ "learning_rate": 3.866616958361159e-05,
+ "loss": 0.4136,
+ "step": 133150
+ },
+ {
+ "epoch": 0.687232033680561,
+ "grad_norm": 20055.0859375,
+ "learning_rate": 3.8630593341367285e-05,
+ "loss": 0.4176,
+ "step": 133200
+ },
+ {
+ "epoch": 0.6874900036631737,
+ "grad_norm": 24807.9140625,
+ "learning_rate": 3.8595023167892096e-05,
+ "loss": 0.4084,
+ "step": 133250
+ },
+ {
+ "epoch": 0.6877479736457865,
+ "grad_norm": 21060.78125,
+ "learning_rate": 3.8559459082172696e-05,
+ "loss": 0.4086,
+ "step": 133300
+ },
+ {
+ "epoch": 0.6880059436283994,
+ "grad_norm": 22740.255859375,
+ "learning_rate": 3.852390110319252e-05,
+ "loss": 0.4109,
+ "step": 133350
+ },
+ {
+ "epoch": 0.6882639136110122,
+ "grad_norm": 24095.68359375,
+ "learning_rate": 3.848834924993169e-05,
+ "loss": 0.4118,
+ "step": 133400
+ },
+ {
+ "epoch": 0.6885218835936251,
+ "grad_norm": 20011.78125,
+ "learning_rate": 3.8452803541367136e-05,
+ "loss": 0.4133,
+ "step": 133450
+ },
+ {
+ "epoch": 0.6887798535762378,
+ "grad_norm": 21369.7265625,
+ "learning_rate": 3.8417263996472444e-05,
+ "loss": 0.4104,
+ "step": 133500
+ },
+ {
+ "epoch": 0.6890378235588507,
+ "grad_norm": 22532.251953125,
+ "learning_rate": 3.8381730634217946e-05,
+ "loss": 0.415,
+ "step": 133550
+ },
+ {
+ "epoch": 0.6892957935414635,
+ "grad_norm": 21174.34765625,
+ "learning_rate": 3.8346203473570677e-05,
+ "loss": 0.4121,
+ "step": 133600
+ },
+ {
+ "epoch": 0.6895537635240764,
+ "grad_norm": 21758.87109375,
+ "learning_rate": 3.831068253349431e-05,
+ "loss": 0.4181,
+ "step": 133650
+ },
+ {
+ "epoch": 0.6898117335066891,
+ "grad_norm": 21809.083984375,
+ "learning_rate": 3.827516783294927e-05,
+ "loss": 0.41,
+ "step": 133700
+ },
+ {
+ "epoch": 0.690069703489302,
+ "grad_norm": 21419.69921875,
+ "learning_rate": 3.8239659390892593e-05,
+ "loss": 0.4166,
+ "step": 133750
+ },
+ {
+ "epoch": 0.6903276734719148,
+ "grad_norm": 20746.517578125,
+ "learning_rate": 3.820415722627802e-05,
+ "loss": 0.4168,
+ "step": 133800
+ },
+ {
+ "epoch": 0.6905856434545277,
+ "grad_norm": 22737.89453125,
+ "learning_rate": 3.816866135805589e-05,
+ "loss": 0.4119,
+ "step": 133850
+ },
+ {
+ "epoch": 0.6908436134371404,
+ "grad_norm": 23691.408203125,
+ "learning_rate": 3.813317180517324e-05,
+ "loss": 0.4105,
+ "step": 133900
+ },
+ {
+ "epoch": 0.6911015834197533,
+ "grad_norm": 22899.70703125,
+ "learning_rate": 3.8097688586573684e-05,
+ "loss": 0.412,
+ "step": 133950
+ },
+ {
+ "epoch": 0.6913595534023661,
+ "grad_norm": 25553.763671875,
+ "learning_rate": 3.8062211721197475e-05,
+ "loss": 0.4158,
+ "step": 134000
+ },
+ {
+ "epoch": 0.691617523384979,
+ "grad_norm": 22099.93359375,
+ "learning_rate": 3.802674122798152e-05,
+ "loss": 0.4149,
+ "step": 134050
+ },
+ {
+ "epoch": 0.6918754933675918,
+ "grad_norm": 25735.91015625,
+ "learning_rate": 3.799127712585922e-05,
+ "loss": 0.4058,
+ "step": 134100
+ },
+ {
+ "epoch": 0.6921334633502045,
+ "grad_norm": 21259.95703125,
+ "learning_rate": 3.795581943376067e-05,
+ "loss": 0.4192,
+ "step": 134150
+ },
+ {
+ "epoch": 0.6923914333328174,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 3.7920368170612476e-05,
+ "loss": 0.414,
+ "step": 134200
+ },
+ {
+ "epoch": 0.6926494033154302,
+ "grad_norm": 24721.974609375,
+ "learning_rate": 3.788492335533786e-05,
+ "loss": 0.4154,
+ "step": 134250
+ },
+ {
+ "epoch": 0.6929073732980431,
+ "grad_norm": 24267.611328125,
+ "learning_rate": 3.7849485006856545e-05,
+ "loss": 0.4108,
+ "step": 134300
+ },
+ {
+ "epoch": 0.6931653432806558,
+ "grad_norm": 25588.193359375,
+ "learning_rate": 3.781405314408486e-05,
+ "loss": 0.4169,
+ "step": 134350
+ },
+ {
+ "epoch": 0.6934233132632687,
+ "grad_norm": 22651.216796875,
+ "learning_rate": 3.7778627785935626e-05,
+ "loss": 0.4112,
+ "step": 134400
+ },
+ {
+ "epoch": 0.6936812832458815,
+ "grad_norm": 24765.76953125,
+ "learning_rate": 3.774320895131823e-05,
+ "loss": 0.4173,
+ "step": 134450
+ },
+ {
+ "epoch": 0.6939392532284944,
+ "grad_norm": 25384.44921875,
+ "learning_rate": 3.7707796659138584e-05,
+ "loss": 0.4097,
+ "step": 134500
+ },
+ {
+ "epoch": 0.6941972232111071,
+ "grad_norm": 21145.587890625,
+ "learning_rate": 3.767239092829903e-05,
+ "loss": 0.4125,
+ "step": 134550
+ },
+ {
+ "epoch": 0.69445519319372,
+ "grad_norm": 22693.28515625,
+ "learning_rate": 3.763699177769849e-05,
+ "loss": 0.4111,
+ "step": 134600
+ },
+ {
+ "epoch": 0.6947131631763328,
+ "grad_norm": 20415.33984375,
+ "learning_rate": 3.760159922623235e-05,
+ "loss": 0.4178,
+ "step": 134650
+ },
+ {
+ "epoch": 0.6949711331589457,
+ "grad_norm": 23304.33984375,
+ "learning_rate": 3.756621329279247e-05,
+ "loss": 0.4142,
+ "step": 134700
+ },
+ {
+ "epoch": 0.6952291031415585,
+ "grad_norm": 22485.029296875,
+ "learning_rate": 3.7530833996267156e-05,
+ "loss": 0.4129,
+ "step": 134750
+ },
+ {
+ "epoch": 0.6954870731241712,
+ "grad_norm": 20506.5625,
+ "learning_rate": 3.7495461355541206e-05,
+ "loss": 0.4104,
+ "step": 134800
+ },
+ {
+ "epoch": 0.6957450431067841,
+ "grad_norm": 26106.26953125,
+ "learning_rate": 3.746009538949584e-05,
+ "loss": 0.4122,
+ "step": 134850
+ },
+ {
+ "epoch": 0.6960030130893969,
+ "grad_norm": 25230.55859375,
+ "learning_rate": 3.742473611700874e-05,
+ "loss": 0.4173,
+ "step": 134900
+ },
+ {
+ "epoch": 0.6962609830720098,
+ "grad_norm": 23462.197265625,
+ "learning_rate": 3.738938355695402e-05,
+ "loss": 0.4211,
+ "step": 134950
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "grad_norm": 22550.8359375,
+ "learning_rate": 3.735403772820213e-05,
+ "loss": 0.4154,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "eval_loss": 0.399837851524353,
+ "eval_runtime": 3136.0222,
+ "eval_samples_per_second": 988.871,
+ "eval_steps_per_second": 1.931,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6967769230372354,
+ "grad_norm": 22235.15234375,
+ "learning_rate": 3.731869864962004e-05,
+ "loss": 0.4183,
+ "step": 135050
+ },
+ {
+ "epoch": 0.6970348930198482,
+ "grad_norm": 21969.208984375,
+ "learning_rate": 3.728336634007105e-05,
+ "loss": 0.41,
+ "step": 135100
+ },
+ {
+ "epoch": 0.6972928630024611,
+ "grad_norm": 22907.32421875,
+ "learning_rate": 3.724804081841488e-05,
+ "loss": 0.4213,
+ "step": 135150
+ },
+ {
+ "epoch": 0.6975508329850738,
+ "grad_norm": 22994.646484375,
+ "learning_rate": 3.721272210350757e-05,
+ "loss": 0.4103,
+ "step": 135200
+ },
+ {
+ "epoch": 0.6978088029676867,
+ "grad_norm": 22118.224609375,
+ "learning_rate": 3.717741021420162e-05,
+ "loss": 0.4195,
+ "step": 135250
+ },
+ {
+ "epoch": 0.6980667729502995,
+ "grad_norm": 19673.6484375,
+ "learning_rate": 3.7142105169345764e-05,
+ "loss": 0.4105,
+ "step": 135300
+ },
+ {
+ "epoch": 0.6983247429329124,
+ "grad_norm": 23110.041015625,
+ "learning_rate": 3.71068069877852e-05,
+ "loss": 0.4132,
+ "step": 135350
+ },
+ {
+ "epoch": 0.6985827129155251,
+ "grad_norm": 26589.453125,
+ "learning_rate": 3.707151568836144e-05,
+ "loss": 0.4171,
+ "step": 135400
+ },
+ {
+ "epoch": 0.6988406828981379,
+ "grad_norm": 25272.74609375,
+ "learning_rate": 3.7036231289912206e-05,
+ "loss": 0.4098,
+ "step": 135450
+ },
+ {
+ "epoch": 0.6990986528807508,
+ "grad_norm": 23238.626953125,
+ "learning_rate": 3.700095381127172e-05,
+ "loss": 0.4102,
+ "step": 135500
+ },
+ {
+ "epoch": 0.6993566228633636,
+ "grad_norm": 25412.8203125,
+ "learning_rate": 3.696568327127036e-05,
+ "loss": 0.4131,
+ "step": 135550
+ },
+ {
+ "epoch": 0.6996145928459765,
+ "grad_norm": 22329.0703125,
+ "learning_rate": 3.693041968873488e-05,
+ "loss": 0.4196,
+ "step": 135600
+ },
+ {
+ "epoch": 0.6998725628285892,
+ "grad_norm": 23497.068359375,
+ "learning_rate": 3.6895163082488294e-05,
+ "loss": 0.4137,
+ "step": 135650
+ },
+ {
+ "epoch": 0.7001305328112021,
+ "grad_norm": 23415.0859375,
+ "learning_rate": 3.6859913471349906e-05,
+ "loss": 0.4088,
+ "step": 135700
+ },
+ {
+ "epoch": 0.7003885027938149,
+ "grad_norm": 24474.064453125,
+ "learning_rate": 3.682467087413525e-05,
+ "loss": 0.4122,
+ "step": 135750
+ },
+ {
+ "epoch": 0.7006464727764278,
+ "grad_norm": 24427.3359375,
+ "learning_rate": 3.678943530965615e-05,
+ "loss": 0.4133,
+ "step": 135800
+ },
+ {
+ "epoch": 0.7009044427590405,
+ "grad_norm": 24399.58203125,
+ "learning_rate": 3.675420679672068e-05,
+ "loss": 0.4113,
+ "step": 135850
+ },
+ {
+ "epoch": 0.7011624127416534,
+ "grad_norm": 22070.033203125,
+ "learning_rate": 3.671898535413313e-05,
+ "loss": 0.4099,
+ "step": 135900
+ },
+ {
+ "epoch": 0.7014203827242662,
+ "grad_norm": 21846.20703125,
+ "learning_rate": 3.668377100069404e-05,
+ "loss": 0.4164,
+ "step": 135950
+ },
+ {
+ "epoch": 0.7016783527068791,
+ "grad_norm": 21927.2265625,
+ "learning_rate": 3.664856375520012e-05,
+ "loss": 0.4124,
+ "step": 136000
+ },
+ {
+ "epoch": 0.7019363226894918,
+ "grad_norm": 22155.341796875,
+ "learning_rate": 3.6613363636444344e-05,
+ "loss": 0.416,
+ "step": 136050
+ },
+ {
+ "epoch": 0.7021942926721046,
+ "grad_norm": 23344.486328125,
+ "learning_rate": 3.6578170663215826e-05,
+ "loss": 0.4162,
+ "step": 136100
+ },
+ {
+ "epoch": 0.7024522626547175,
+ "grad_norm": 23390.642578125,
+ "learning_rate": 3.6542984854299936e-05,
+ "loss": 0.4082,
+ "step": 136150
+ },
+ {
+ "epoch": 0.7027102326373303,
+ "grad_norm": 22980.90625,
+ "learning_rate": 3.6507806228478125e-05,
+ "loss": 0.4067,
+ "step": 136200
+ },
+ {
+ "epoch": 0.7029682026199432,
+ "grad_norm": 22321.662109375,
+ "learning_rate": 3.6472634804528095e-05,
+ "loss": 0.4129,
+ "step": 136250
+ },
+ {
+ "epoch": 0.7032261726025559,
+ "grad_norm": 22719.455078125,
+ "learning_rate": 3.643747060122366e-05,
+ "loss": 0.4169,
+ "step": 136300
+ },
+ {
+ "epoch": 0.7034841425851688,
+ "grad_norm": 25283.494140625,
+ "learning_rate": 3.640231363733481e-05,
+ "loss": 0.4081,
+ "step": 136350
+ },
+ {
+ "epoch": 0.7037421125677816,
+ "grad_norm": 24430.919921875,
+ "learning_rate": 3.636716393162764e-05,
+ "loss": 0.4187,
+ "step": 136400
+ },
+ {
+ "epoch": 0.7040000825503945,
+ "grad_norm": 23372.662109375,
+ "learning_rate": 3.633202150286435e-05,
+ "loss": 0.4117,
+ "step": 136450
+ },
+ {
+ "epoch": 0.7042580525330072,
+ "grad_norm": 23912.595703125,
+ "learning_rate": 3.6296886369803346e-05,
+ "loss": 0.4126,
+ "step": 136500
+ },
+ {
+ "epoch": 0.7045160225156201,
+ "grad_norm": 24092.0390625,
+ "learning_rate": 3.626175855119903e-05,
+ "loss": 0.4163,
+ "step": 136550
+ },
+ {
+ "epoch": 0.7047739924982329,
+ "grad_norm": 23452.2421875,
+ "learning_rate": 3.6226638065802e-05,
+ "loss": 0.4088,
+ "step": 136600
+ },
+ {
+ "epoch": 0.7050319624808458,
+ "grad_norm": 24399.787109375,
+ "learning_rate": 3.6191524932358845e-05,
+ "loss": 0.4139,
+ "step": 136650
+ },
+ {
+ "epoch": 0.7052899324634585,
+ "grad_norm": 23295.599609375,
+ "learning_rate": 3.6156419169612287e-05,
+ "loss": 0.4112,
+ "step": 136700
+ },
+ {
+ "epoch": 0.7055479024460714,
+ "grad_norm": 25809.876953125,
+ "learning_rate": 3.6121320796301126e-05,
+ "loss": 0.4141,
+ "step": 136750
+ },
+ {
+ "epoch": 0.7058058724286842,
+ "grad_norm": 21679.818359375,
+ "learning_rate": 3.608622983116018e-05,
+ "loss": 0.4183,
+ "step": 136800
+ },
+ {
+ "epoch": 0.706063842411297,
+ "grad_norm": 24492.578125,
+ "learning_rate": 3.6051146292920334e-05,
+ "loss": 0.4103,
+ "step": 136850
+ },
+ {
+ "epoch": 0.7063218123939099,
+ "grad_norm": 24805.59375,
+ "learning_rate": 3.601607020030847e-05,
+ "loss": 0.4129,
+ "step": 136900
+ },
+ {
+ "epoch": 0.7065797823765226,
+ "grad_norm": 23000.9765625,
+ "learning_rate": 3.5981001572047566e-05,
+ "loss": 0.4091,
+ "step": 136950
+ },
+ {
+ "epoch": 0.7068377523591355,
+ "grad_norm": 24590.6875,
+ "learning_rate": 3.594594042685655e-05,
+ "loss": 0.4061,
+ "step": 137000
+ },
+ {
+ "epoch": 0.7070957223417483,
+ "grad_norm": 22223.16015625,
+ "learning_rate": 3.5910886783450416e-05,
+ "loss": 0.4174,
+ "step": 137050
+ },
+ {
+ "epoch": 0.7073536923243612,
+ "grad_norm": 28207.7578125,
+ "learning_rate": 3.587584066054007e-05,
+ "loss": 0.4119,
+ "step": 137100
+ },
+ {
+ "epoch": 0.7076116623069739,
+ "grad_norm": 23703.271484375,
+ "learning_rate": 3.584080207683249e-05,
+ "loss": 0.4104,
+ "step": 137150
+ },
+ {
+ "epoch": 0.7078696322895868,
+ "grad_norm": 24903.92578125,
+ "learning_rate": 3.580577105103059e-05,
+ "loss": 0.4139,
+ "step": 137200
+ },
+ {
+ "epoch": 0.7081276022721996,
+ "grad_norm": 21130.029296875,
+ "learning_rate": 3.5770747601833235e-05,
+ "loss": 0.4208,
+ "step": 137250
+ },
+ {
+ "epoch": 0.7083855722548125,
+ "grad_norm": 22223.611328125,
+ "learning_rate": 3.5735731747935306e-05,
+ "loss": 0.4118,
+ "step": 137300
+ },
+ {
+ "epoch": 0.7086435422374252,
+ "grad_norm": 21862.12109375,
+ "learning_rate": 3.570072350802753e-05,
+ "loss": 0.4101,
+ "step": 137350
+ },
+ {
+ "epoch": 0.708901512220038,
+ "grad_norm": 22504.25390625,
+ "learning_rate": 3.566572290079667e-05,
+ "loss": 0.4187,
+ "step": 137400
+ },
+ {
+ "epoch": 0.7091594822026509,
+ "grad_norm": 21898.53125,
+ "learning_rate": 3.563072994492535e-05,
+ "loss": 0.4068,
+ "step": 137450
+ },
+ {
+ "epoch": 0.7094174521852638,
+ "grad_norm": 21629.5859375,
+ "learning_rate": 3.559574465909215e-05,
+ "loss": 0.4107,
+ "step": 137500
+ },
+ {
+ "epoch": 0.7096754221678765,
+ "grad_norm": 23078.080078125,
+ "learning_rate": 3.5560767061971515e-05,
+ "loss": 0.4093,
+ "step": 137550
+ },
+ {
+ "epoch": 0.7099333921504893,
+ "grad_norm": 21831.11328125,
+ "learning_rate": 3.5525797172233826e-05,
+ "loss": 0.4083,
+ "step": 137600
+ },
+ {
+ "epoch": 0.7101913621331022,
+ "grad_norm": 20934.220703125,
+ "learning_rate": 3.5490835008545334e-05,
+ "loss": 0.4143,
+ "step": 137650
+ },
+ {
+ "epoch": 0.710449332115715,
+ "grad_norm": 21335.0,
+ "learning_rate": 3.545588058956816e-05,
+ "loss": 0.4104,
+ "step": 137700
+ },
+ {
+ "epoch": 0.7107073020983279,
+ "grad_norm": 20424.279296875,
+ "learning_rate": 3.542093393396031e-05,
+ "loss": 0.4117,
+ "step": 137750
+ },
+ {
+ "epoch": 0.7109652720809406,
+ "grad_norm": 24527.76171875,
+ "learning_rate": 3.5385995060375596e-05,
+ "loss": 0.4128,
+ "step": 137800
+ },
+ {
+ "epoch": 0.7112232420635535,
+ "grad_norm": 23370.17578125,
+ "learning_rate": 3.535106398746376e-05,
+ "loss": 0.4149,
+ "step": 137850
+ },
+ {
+ "epoch": 0.7114812120461663,
+ "grad_norm": 22996.2890625,
+ "learning_rate": 3.531614073387028e-05,
+ "loss": 0.412,
+ "step": 137900
+ },
+ {
+ "epoch": 0.7117391820287792,
+ "grad_norm": 26592.931640625,
+ "learning_rate": 3.528122531823657e-05,
+ "loss": 0.4111,
+ "step": 137950
+ },
+ {
+ "epoch": 0.7119971520113919,
+ "grad_norm": 22353.35546875,
+ "learning_rate": 3.5246317759199745e-05,
+ "loss": 0.412,
+ "step": 138000
+ },
+ {
+ "epoch": 0.7122551219940048,
+ "grad_norm": 22266.91796875,
+ "learning_rate": 3.521141807539281e-05,
+ "loss": 0.4113,
+ "step": 138050
+ },
+ {
+ "epoch": 0.7125130919766176,
+ "grad_norm": 21723.318359375,
+ "learning_rate": 3.517652628544457e-05,
+ "loss": 0.4058,
+ "step": 138100
+ },
+ {
+ "epoch": 0.7127710619592305,
+ "grad_norm": 23738.322265625,
+ "learning_rate": 3.5141642407979535e-05,
+ "loss": 0.4072,
+ "step": 138150
+ },
+ {
+ "epoch": 0.7130290319418432,
+ "grad_norm": 25993.587890625,
+ "learning_rate": 3.5106766461618083e-05,
+ "loss": 0.4066,
+ "step": 138200
+ },
+ {
+ "epoch": 0.713287001924456,
+ "grad_norm": 23321.55859375,
+ "learning_rate": 3.50718984649763e-05,
+ "loss": 0.4104,
+ "step": 138250
+ },
+ {
+ "epoch": 0.7135449719070689,
+ "grad_norm": 22022.267578125,
+ "learning_rate": 3.503703843666605e-05,
+ "loss": 0.4096,
+ "step": 138300
+ },
+ {
+ "epoch": 0.7138029418896817,
+ "grad_norm": 22249.640625,
+ "learning_rate": 3.500218639529493e-05,
+ "loss": 0.4121,
+ "step": 138350
+ },
+ {
+ "epoch": 0.7140609118722946,
+ "grad_norm": 21145.283203125,
+ "learning_rate": 3.496734235946632e-05,
+ "loss": 0.4126,
+ "step": 138400
+ },
+ {
+ "epoch": 0.7143188818549073,
+ "grad_norm": 22439.38671875,
+ "learning_rate": 3.493250634777924e-05,
+ "loss": 0.4076,
+ "step": 138450
+ },
+ {
+ "epoch": 0.7145768518375202,
+ "grad_norm": 25641.93359375,
+ "learning_rate": 3.4897678378828516e-05,
+ "loss": 0.4105,
+ "step": 138500
+ },
+ {
+ "epoch": 0.714834821820133,
+ "grad_norm": 22200.46875,
+ "learning_rate": 3.486285847120465e-05,
+ "loss": 0.4097,
+ "step": 138550
+ },
+ {
+ "epoch": 0.7150927918027459,
+ "grad_norm": 22691.666015625,
+ "learning_rate": 3.482804664349381e-05,
+ "loss": 0.4154,
+ "step": 138600
+ },
+ {
+ "epoch": 0.7153507617853586,
+ "grad_norm": 22139.16796875,
+ "learning_rate": 3.479324291427788e-05,
+ "loss": 0.4124,
+ "step": 138650
+ },
+ {
+ "epoch": 0.7156087317679715,
+ "grad_norm": 23695.7578125,
+ "learning_rate": 3.4758447302134414e-05,
+ "loss": 0.4174,
+ "step": 138700
+ },
+ {
+ "epoch": 0.7158667017505843,
+ "grad_norm": 24720.06640625,
+ "learning_rate": 3.472365982563666e-05,
+ "loss": 0.4095,
+ "step": 138750
+ },
+ {
+ "epoch": 0.7161246717331972,
+ "grad_norm": 22861.171875,
+ "learning_rate": 3.4688880503353474e-05,
+ "loss": 0.4039,
+ "step": 138800
+ },
+ {
+ "epoch": 0.7163826417158099,
+ "grad_norm": 22751.833984375,
+ "learning_rate": 3.465410935384939e-05,
+ "loss": 0.4175,
+ "step": 138850
+ },
+ {
+ "epoch": 0.7166406116984227,
+ "grad_norm": 22689.5,
+ "learning_rate": 3.461934639568457e-05,
+ "loss": 0.4133,
+ "step": 138900
+ },
+ {
+ "epoch": 0.7168985816810356,
+ "grad_norm": 23292.1328125,
+ "learning_rate": 3.458459164741482e-05,
+ "loss": 0.4062,
+ "step": 138950
+ },
+ {
+ "epoch": 0.7171565516636484,
+ "grad_norm": 22390.515625,
+ "learning_rate": 3.4549845127591563e-05,
+ "loss": 0.4169,
+ "step": 139000
+ },
+ {
+ "epoch": 0.7174145216462613,
+ "grad_norm": 23531.9921875,
+ "learning_rate": 3.451510685476178e-05,
+ "loss": 0.4084,
+ "step": 139050
+ },
+ {
+ "epoch": 0.717672491628874,
+ "grad_norm": 23847.154296875,
+ "learning_rate": 3.448037684746812e-05,
+ "loss": 0.4134,
+ "step": 139100
+ },
+ {
+ "epoch": 0.7179304616114869,
+ "grad_norm": 22651.15234375,
+ "learning_rate": 3.4445655124248774e-05,
+ "loss": 0.4118,
+ "step": 139150
+ },
+ {
+ "epoch": 0.7181884315940997,
+ "grad_norm": 21893.123046875,
+ "learning_rate": 3.441094170363755e-05,
+ "loss": 0.4065,
+ "step": 139200
+ },
+ {
+ "epoch": 0.7184464015767126,
+ "grad_norm": 22238.685546875,
+ "learning_rate": 3.4376236604163756e-05,
+ "loss": 0.4164,
+ "step": 139250
+ },
+ {
+ "epoch": 0.7187043715593253,
+ "grad_norm": 25605.083984375,
+ "learning_rate": 3.434153984435234e-05,
+ "loss": 0.4105,
+ "step": 139300
+ },
+ {
+ "epoch": 0.7189623415419382,
+ "grad_norm": 22414.0703125,
+ "learning_rate": 3.430685144272374e-05,
+ "loss": 0.4095,
+ "step": 139350
+ },
+ {
+ "epoch": 0.719220311524551,
+ "grad_norm": 22067.443359375,
+ "learning_rate": 3.4272171417793954e-05,
+ "loss": 0.4105,
+ "step": 139400
+ },
+ {
+ "epoch": 0.7194782815071639,
+ "grad_norm": 22398.36328125,
+ "learning_rate": 3.423749978807454e-05,
+ "loss": 0.4065,
+ "step": 139450
+ },
+ {
+ "epoch": 0.7197362514897766,
+ "grad_norm": 25660.017578125,
+ "learning_rate": 3.420283657207248e-05,
+ "loss": 0.4139,
+ "step": 139500
+ },
+ {
+ "epoch": 0.7199942214723895,
+ "grad_norm": 27245.4609375,
+ "learning_rate": 3.416818178829039e-05,
+ "loss": 0.4106,
+ "step": 139550
+ },
+ {
+ "epoch": 0.7202521914550023,
+ "grad_norm": 22430.6484375,
+ "learning_rate": 3.413353545522628e-05,
+ "loss": 0.4103,
+ "step": 139600
+ },
+ {
+ "epoch": 0.7205101614376151,
+ "grad_norm": 25269.876953125,
+ "learning_rate": 3.409889759137373e-05,
+ "loss": 0.4073,
+ "step": 139650
+ },
+ {
+ "epoch": 0.720768131420228,
+ "grad_norm": 22811.275390625,
+ "learning_rate": 3.406426821522172e-05,
+ "loss": 0.4156,
+ "step": 139700
+ },
+ {
+ "epoch": 0.7210261014028407,
+ "grad_norm": 21838.966796875,
+ "learning_rate": 3.402964734525477e-05,
+ "loss": 0.4132,
+ "step": 139750
+ },
+ {
+ "epoch": 0.7212840713854536,
+ "grad_norm": 22130.935546875,
+ "learning_rate": 3.39950349999528e-05,
+ "loss": 0.418,
+ "step": 139800
+ },
+ {
+ "epoch": 0.7215420413680664,
+ "grad_norm": 22744.779296875,
+ "learning_rate": 3.396043119779123e-05,
+ "loss": 0.4098,
+ "step": 139850
+ },
+ {
+ "epoch": 0.7218000113506793,
+ "grad_norm": 22559.07421875,
+ "learning_rate": 3.392583595724093e-05,
+ "loss": 0.4159,
+ "step": 139900
+ },
+ {
+ "epoch": 0.722057981333292,
+ "grad_norm": 20920.349609375,
+ "learning_rate": 3.3891249296768116e-05,
+ "loss": 0.406,
+ "step": 139950
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "grad_norm": 20708.716796875,
+ "learning_rate": 3.38566712348345e-05,
+ "loss": 0.4102,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "eval_loss": 0.39852654933929443,
+ "eval_runtime": 3128.1309,
+ "eval_samples_per_second": 991.365,
+ "eval_steps_per_second": 1.936,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7225739212985177,
+ "grad_norm": 24440.734375,
+ "learning_rate": 3.382210178989718e-05,
+ "loss": 0.4144,
+ "step": 140050
+ },
+ {
+ "epoch": 0.7228318912811306,
+ "grad_norm": 22715.88671875,
+ "learning_rate": 3.378754098040867e-05,
+ "loss": 0.4146,
+ "step": 140100
+ },
+ {
+ "epoch": 0.7230898612637433,
+ "grad_norm": 23713.474609375,
+ "learning_rate": 3.375298882481683e-05,
+ "loss": 0.4089,
+ "step": 140150
+ },
+ {
+ "epoch": 0.7233478312463562,
+ "grad_norm": 24705.048828125,
+ "learning_rate": 3.371844534156497e-05,
+ "loss": 0.4052,
+ "step": 140200
+ },
+ {
+ "epoch": 0.723605801228969,
+ "grad_norm": 22624.98046875,
+ "learning_rate": 3.368391054909169e-05,
+ "loss": 0.4155,
+ "step": 140250
+ },
+ {
+ "epoch": 0.7238637712115819,
+ "grad_norm": 24774.72265625,
+ "learning_rate": 3.364938446583103e-05,
+ "loss": 0.4058,
+ "step": 140300
+ },
+ {
+ "epoch": 0.7241217411941946,
+ "grad_norm": 24109.02734375,
+ "learning_rate": 3.361486711021235e-05,
+ "loss": 0.4169,
+ "step": 140350
+ },
+ {
+ "epoch": 0.7243797111768074,
+ "grad_norm": 20315.724609375,
+ "learning_rate": 3.3580358500660284e-05,
+ "loss": 0.4135,
+ "step": 140400
+ },
+ {
+ "epoch": 0.7246376811594203,
+ "grad_norm": 26642.84765625,
+ "learning_rate": 3.3545858655594935e-05,
+ "loss": 0.4182,
+ "step": 140450
+ },
+ {
+ "epoch": 0.7248956511420331,
+ "grad_norm": 23466.93359375,
+ "learning_rate": 3.351136759343161e-05,
+ "loss": 0.4098,
+ "step": 140500
+ },
+ {
+ "epoch": 0.725153621124646,
+ "grad_norm": 25247.11328125,
+ "learning_rate": 3.3476885332580985e-05,
+ "loss": 0.4085,
+ "step": 140550
+ },
+ {
+ "epoch": 0.7254115911072587,
+ "grad_norm": 25220.11328125,
+ "learning_rate": 3.3442411891449e-05,
+ "loss": 0.4139,
+ "step": 140600
+ },
+ {
+ "epoch": 0.7256695610898716,
+ "grad_norm": 21836.095703125,
+ "learning_rate": 3.3407947288436936e-05,
+ "loss": 0.4127,
+ "step": 140650
+ },
+ {
+ "epoch": 0.7259275310724844,
+ "grad_norm": 22301.443359375,
+ "learning_rate": 3.3373491541941346e-05,
+ "loss": 0.4127,
+ "step": 140700
+ },
+ {
+ "epoch": 0.7261855010550973,
+ "grad_norm": 21902.615234375,
+ "learning_rate": 3.333904467035399e-05,
+ "loss": 0.4111,
+ "step": 140750
+ },
+ {
+ "epoch": 0.72644347103771,
+ "grad_norm": 21408.71484375,
+ "learning_rate": 3.3304606692061984e-05,
+ "loss": 0.4095,
+ "step": 140800
+ },
+ {
+ "epoch": 0.7267014410203229,
+ "grad_norm": 26146.03515625,
+ "learning_rate": 3.3270177625447626e-05,
+ "loss": 0.4096,
+ "step": 140850
+ },
+ {
+ "epoch": 0.7269594110029357,
+ "grad_norm": 22772.9921875,
+ "learning_rate": 3.323575748888852e-05,
+ "loss": 0.4109,
+ "step": 140900
+ },
+ {
+ "epoch": 0.7272173809855486,
+ "grad_norm": 24654.810546875,
+ "learning_rate": 3.320134630075742e-05,
+ "loss": 0.4135,
+ "step": 140950
+ },
+ {
+ "epoch": 0.7274753509681613,
+ "grad_norm": 23458.103515625,
+ "learning_rate": 3.31669440794224e-05,
+ "loss": 0.4128,
+ "step": 141000
+ },
+ {
+ "epoch": 0.7277333209507741,
+ "grad_norm": 22455.630859375,
+ "learning_rate": 3.3132550843246654e-05,
+ "loss": 0.411,
+ "step": 141050
+ },
+ {
+ "epoch": 0.727991290933387,
+ "grad_norm": 22372.08203125,
+ "learning_rate": 3.3098166610588655e-05,
+ "loss": 0.413,
+ "step": 141100
+ },
+ {
+ "epoch": 0.7282492609159998,
+ "grad_norm": 22878.216796875,
+ "learning_rate": 3.306379139980206e-05,
+ "loss": 0.4054,
+ "step": 141150
+ },
+ {
+ "epoch": 0.7285072308986127,
+ "grad_norm": 22959.708984375,
+ "learning_rate": 3.302942522923563e-05,
+ "loss": 0.4114,
+ "step": 141200
+ },
+ {
+ "epoch": 0.7287652008812254,
+ "grad_norm": 22574.986328125,
+ "learning_rate": 3.2995068117233417e-05,
+ "loss": 0.4105,
+ "step": 141250
+ },
+ {
+ "epoch": 0.7290231708638383,
+ "grad_norm": 23770.279296875,
+ "learning_rate": 3.2960720082134555e-05,
+ "loss": 0.4091,
+ "step": 141300
+ },
+ {
+ "epoch": 0.7292811408464511,
+ "grad_norm": 23017.416015625,
+ "learning_rate": 3.292638114227338e-05,
+ "loss": 0.411,
+ "step": 141350
+ },
+ {
+ "epoch": 0.729539110829064,
+ "grad_norm": 23605.982421875,
+ "learning_rate": 3.289205131597932e-05,
+ "loss": 0.4097,
+ "step": 141400
+ },
+ {
+ "epoch": 0.7297970808116767,
+ "grad_norm": 22409.12890625,
+ "learning_rate": 3.2857730621577006e-05,
+ "loss": 0.4096,
+ "step": 141450
+ },
+ {
+ "epoch": 0.7300550507942896,
+ "grad_norm": 22681.11328125,
+ "learning_rate": 3.282341907738613e-05,
+ "loss": 0.4066,
+ "step": 141500
+ },
+ {
+ "epoch": 0.7303130207769024,
+ "grad_norm": 27188.859375,
+ "learning_rate": 3.278911670172154e-05,
+ "loss": 0.4104,
+ "step": 141550
+ },
+ {
+ "epoch": 0.7305709907595153,
+ "grad_norm": 25134.85546875,
+ "learning_rate": 3.2754823512893225e-05,
+ "loss": 0.4105,
+ "step": 141600
+ },
+ {
+ "epoch": 0.730828960742128,
+ "grad_norm": 21408.478515625,
+ "learning_rate": 3.2720539529206154e-05,
+ "loss": 0.412,
+ "step": 141650
+ },
+ {
+ "epoch": 0.7310869307247408,
+ "grad_norm": 21062.59375,
+ "learning_rate": 3.26862647689605e-05,
+ "loss": 0.411,
+ "step": 141700
+ },
+ {
+ "epoch": 0.7313449007073537,
+ "grad_norm": 21591.23828125,
+ "learning_rate": 3.265199925045143e-05,
+ "loss": 0.4171,
+ "step": 141750
+ },
+ {
+ "epoch": 0.7316028706899665,
+ "grad_norm": 23328.751953125,
+ "learning_rate": 3.261774299196926e-05,
+ "loss": 0.4127,
+ "step": 141800
+ },
+ {
+ "epoch": 0.7318608406725794,
+ "grad_norm": 27247.59375,
+ "learning_rate": 3.258349601179928e-05,
+ "loss": 0.4087,
+ "step": 141850
+ },
+ {
+ "epoch": 0.7321188106551921,
+ "grad_norm": 24500.822265625,
+ "learning_rate": 3.254925832822188e-05,
+ "loss": 0.4015,
+ "step": 141900
+ },
+ {
+ "epoch": 0.732376780637805,
+ "grad_norm": 25855.849609375,
+ "learning_rate": 3.251502995951247e-05,
+ "loss": 0.4125,
+ "step": 141950
+ },
+ {
+ "epoch": 0.7326347506204178,
+ "grad_norm": 23075.234375,
+ "learning_rate": 3.248081092394148e-05,
+ "loss": 0.4112,
+ "step": 142000
+ },
+ {
+ "epoch": 0.7328927206030307,
+ "grad_norm": 25166.712890625,
+ "learning_rate": 3.2446601239774405e-05,
+ "loss": 0.4121,
+ "step": 142050
+ },
+ {
+ "epoch": 0.7331506905856434,
+ "grad_norm": 23327.337890625,
+ "learning_rate": 3.241240092527167e-05,
+ "loss": 0.41,
+ "step": 142100
+ },
+ {
+ "epoch": 0.7334086605682563,
+ "grad_norm": 34138.34375,
+ "learning_rate": 3.237820999868876e-05,
+ "loss": 0.413,
+ "step": 142150
+ },
+ {
+ "epoch": 0.7336666305508691,
+ "grad_norm": 23031.2109375,
+ "learning_rate": 3.234402847827612e-05,
+ "loss": 0.414,
+ "step": 142200
+ },
+ {
+ "epoch": 0.733924600533482,
+ "grad_norm": 23237.44921875,
+ "learning_rate": 3.230985638227921e-05,
+ "loss": 0.4159,
+ "step": 142250
+ },
+ {
+ "epoch": 0.7341825705160947,
+ "grad_norm": 21437.705078125,
+ "learning_rate": 3.2275693728938395e-05,
+ "loss": 0.4078,
+ "step": 142300
+ },
+ {
+ "epoch": 0.7344405404987076,
+ "grad_norm": 23815.9140625,
+ "learning_rate": 3.224154053648906e-05,
+ "loss": 0.4135,
+ "step": 142350
+ },
+ {
+ "epoch": 0.7346985104813204,
+ "grad_norm": 26809.724609375,
+ "learning_rate": 3.2207396823161514e-05,
+ "loss": 0.409,
+ "step": 142400
+ },
+ {
+ "epoch": 0.7349564804639332,
+ "grad_norm": 21905.6484375,
+ "learning_rate": 3.2173262607181e-05,
+ "loss": 0.41,
+ "step": 142450
+ },
+ {
+ "epoch": 0.735214450446546,
+ "grad_norm": 23628.076171875,
+ "learning_rate": 3.2139137906767743e-05,
+ "loss": 0.4175,
+ "step": 142500
+ },
+ {
+ "epoch": 0.7354724204291588,
+ "grad_norm": 24156.837890625,
+ "learning_rate": 3.210502274013679e-05,
+ "loss": 0.4114,
+ "step": 142550
+ },
+ {
+ "epoch": 0.7357303904117717,
+ "grad_norm": 21517.404296875,
+ "learning_rate": 3.207091712549819e-05,
+ "loss": 0.4112,
+ "step": 142600
+ },
+ {
+ "epoch": 0.7359883603943845,
+ "grad_norm": 22684.734375,
+ "learning_rate": 3.203682108105685e-05,
+ "loss": 0.417,
+ "step": 142650
+ },
+ {
+ "epoch": 0.7362463303769974,
+ "grad_norm": 22205.361328125,
+ "learning_rate": 3.2002734625012585e-05,
+ "loss": 0.4111,
+ "step": 142700
+ },
+ {
+ "epoch": 0.7365043003596101,
+ "grad_norm": 21131.41796875,
+ "learning_rate": 3.196865777556008e-05,
+ "loss": 0.4114,
+ "step": 142750
+ },
+ {
+ "epoch": 0.736762270342223,
+ "grad_norm": 23506.66796875,
+ "learning_rate": 3.1934590550888894e-05,
+ "loss": 0.4183,
+ "step": 142800
+ },
+ {
+ "epoch": 0.7370202403248358,
+ "grad_norm": 24435.4609375,
+ "learning_rate": 3.190053296918345e-05,
+ "loss": 0.4166,
+ "step": 142850
+ },
+ {
+ "epoch": 0.7372782103074487,
+ "grad_norm": 22610.4296875,
+ "learning_rate": 3.186648504862303e-05,
+ "loss": 0.4109,
+ "step": 142900
+ },
+ {
+ "epoch": 0.7375361802900614,
+ "grad_norm": 24722.974609375,
+ "learning_rate": 3.183244680738178e-05,
+ "loss": 0.4093,
+ "step": 142950
+ },
+ {
+ "epoch": 0.7377941502726743,
+ "grad_norm": 23927.6640625,
+ "learning_rate": 3.1798418263628596e-05,
+ "loss": 0.4106,
+ "step": 143000
+ },
+ {
+ "epoch": 0.7380521202552871,
+ "grad_norm": 23958.216796875,
+ "learning_rate": 3.176439943552732e-05,
+ "loss": 0.4067,
+ "step": 143050
+ },
+ {
+ "epoch": 0.7383100902379,
+ "grad_norm": 23272.818359375,
+ "learning_rate": 3.1730390341236496e-05,
+ "loss": 0.4086,
+ "step": 143100
+ },
+ {
+ "epoch": 0.7385680602205127,
+ "grad_norm": 20998.751953125,
+ "learning_rate": 3.1696390998909556e-05,
+ "loss": 0.4099,
+ "step": 143150
+ },
+ {
+ "epoch": 0.7388260302031255,
+ "grad_norm": 24493.677734375,
+ "learning_rate": 3.166240142669464e-05,
+ "loss": 0.413,
+ "step": 143200
+ },
+ {
+ "epoch": 0.7390840001857384,
+ "grad_norm": 22639.927734375,
+ "learning_rate": 3.162842164273479e-05,
+ "loss": 0.4105,
+ "step": 143250
+ },
+ {
+ "epoch": 0.7393419701683512,
+ "grad_norm": 24407.029296875,
+ "learning_rate": 3.15944516651677e-05,
+ "loss": 0.4188,
+ "step": 143300
+ },
+ {
+ "epoch": 0.7395999401509641,
+ "grad_norm": 26538.68359375,
+ "learning_rate": 3.156049151212591e-05,
+ "loss": 0.4135,
+ "step": 143350
+ },
+ {
+ "epoch": 0.7398579101335768,
+ "grad_norm": 24519.060546875,
+ "learning_rate": 3.1526541201736695e-05,
+ "loss": 0.4141,
+ "step": 143400
+ },
+ {
+ "epoch": 0.7401158801161897,
+ "grad_norm": 21236.681640625,
+ "learning_rate": 3.149260075212206e-05,
+ "loss": 0.4096,
+ "step": 143450
+ },
+ {
+ "epoch": 0.7403738500988025,
+ "grad_norm": 24463.015625,
+ "learning_rate": 3.1458670181398796e-05,
+ "loss": 0.4035,
+ "step": 143500
+ },
+ {
+ "epoch": 0.7406318200814154,
+ "grad_norm": 26984.408203125,
+ "learning_rate": 3.1424749507678336e-05,
+ "loss": 0.4115,
+ "step": 143550
+ },
+ {
+ "epoch": 0.7408897900640281,
+ "grad_norm": 25619.35546875,
+ "learning_rate": 3.139083874906691e-05,
+ "loss": 0.4131,
+ "step": 143600
+ },
+ {
+ "epoch": 0.741147760046641,
+ "grad_norm": 24277.7890625,
+ "learning_rate": 3.13569379236654e-05,
+ "loss": 0.3994,
+ "step": 143650
+ },
+ {
+ "epoch": 0.7414057300292538,
+ "grad_norm": 24007.654296875,
+ "learning_rate": 3.1323047049569446e-05,
+ "loss": 0.4091,
+ "step": 143700
+ },
+ {
+ "epoch": 0.7416637000118667,
+ "grad_norm": 21688.703125,
+ "learning_rate": 3.12891661448693e-05,
+ "loss": 0.4176,
+ "step": 143750
+ },
+ {
+ "epoch": 0.7419216699944794,
+ "grad_norm": 22735.900390625,
+ "learning_rate": 3.125529522764995e-05,
+ "loss": 0.4091,
+ "step": 143800
+ },
+ {
+ "epoch": 0.7421796399770922,
+ "grad_norm": 23359.259765625,
+ "learning_rate": 3.122143431599105e-05,
+ "loss": 0.4125,
+ "step": 143850
+ },
+ {
+ "epoch": 0.7424376099597051,
+ "grad_norm": 26637.263671875,
+ "learning_rate": 3.118758342796687e-05,
+ "loss": 0.4137,
+ "step": 143900
+ },
+ {
+ "epoch": 0.7426955799423179,
+ "grad_norm": 24977.3984375,
+ "learning_rate": 3.1153742581646406e-05,
+ "loss": 0.4094,
+ "step": 143950
+ },
+ {
+ "epoch": 0.7429535499249308,
+ "grad_norm": 25850.91796875,
+ "learning_rate": 3.111991179509318e-05,
+ "loss": 0.4092,
+ "step": 144000
+ },
+ {
+ "epoch": 0.7432115199075435,
+ "grad_norm": 22823.0625,
+ "learning_rate": 3.1086091086365474e-05,
+ "loss": 0.4111,
+ "step": 144050
+ },
+ {
+ "epoch": 0.7434694898901564,
+ "grad_norm": 24187.640625,
+ "learning_rate": 3.1052280473516076e-05,
+ "loss": 0.414,
+ "step": 144100
+ },
+ {
+ "epoch": 0.7437274598727692,
+ "grad_norm": 21726.537109375,
+ "learning_rate": 3.101847997459249e-05,
+ "loss": 0.4098,
+ "step": 144150
+ },
+ {
+ "epoch": 0.7439854298553821,
+ "grad_norm": 23075.27734375,
+ "learning_rate": 3.098468960763671e-05,
+ "loss": 0.4084,
+ "step": 144200
+ },
+ {
+ "epoch": 0.7442433998379948,
+ "grad_norm": 24207.529296875,
+ "learning_rate": 3.095090939068541e-05,
+ "loss": 0.4156,
+ "step": 144250
+ },
+ {
+ "epoch": 0.7445013698206077,
+ "grad_norm": 25209.39453125,
+ "learning_rate": 3.091713934176982e-05,
+ "loss": 0.4122,
+ "step": 144300
+ },
+ {
+ "epoch": 0.7447593398032205,
+ "grad_norm": 24308.361328125,
+ "learning_rate": 3.088337947891573e-05,
+ "loss": 0.408,
+ "step": 144350
+ },
+ {
+ "epoch": 0.7450173097858334,
+ "grad_norm": 22416.990234375,
+ "learning_rate": 3.0849629820143517e-05,
+ "loss": 0.4136,
+ "step": 144400
+ },
+ {
+ "epoch": 0.7452752797684461,
+ "grad_norm": 24417.943359375,
+ "learning_rate": 3.081589038346806e-05,
+ "loss": 0.4079,
+ "step": 144450
+ },
+ {
+ "epoch": 0.745533249751059,
+ "grad_norm": 21519.650390625,
+ "learning_rate": 3.078216118689885e-05,
+ "loss": 0.4073,
+ "step": 144500
+ },
+ {
+ "epoch": 0.7457912197336718,
+ "grad_norm": 22821.1796875,
+ "learning_rate": 3.074844224843986e-05,
+ "loss": 0.4058,
+ "step": 144550
+ },
+ {
+ "epoch": 0.7460491897162846,
+ "grad_norm": 22559.86328125,
+ "learning_rate": 3.071473358608963e-05,
+ "loss": 0.413,
+ "step": 144600
+ },
+ {
+ "epoch": 0.7463071596988974,
+ "grad_norm": 25803.658203125,
+ "learning_rate": 3.068103521784115e-05,
+ "loss": 0.4077,
+ "step": 144650
+ },
+ {
+ "epoch": 0.7465651296815102,
+ "grad_norm": 27601.787109375,
+ "learning_rate": 3.0647347161681983e-05,
+ "loss": 0.4057,
+ "step": 144700
+ },
+ {
+ "epoch": 0.7468230996641231,
+ "grad_norm": 21363.67578125,
+ "learning_rate": 3.061366943559417e-05,
+ "loss": 0.4082,
+ "step": 144750
+ },
+ {
+ "epoch": 0.7470810696467359,
+ "grad_norm": 24007.3046875,
+ "learning_rate": 3.058000205755421e-05,
+ "loss": 0.4121,
+ "step": 144800
+ },
+ {
+ "epoch": 0.7473390396293488,
+ "grad_norm": 29940.8125,
+ "learning_rate": 3.054634504553312e-05,
+ "loss": 0.4046,
+ "step": 144850
+ },
+ {
+ "epoch": 0.7475970096119615,
+ "grad_norm": 24161.90234375,
+ "learning_rate": 3.0512698417496334e-05,
+ "loss": 0.4108,
+ "step": 144900
+ },
+ {
+ "epoch": 0.7478549795945744,
+ "grad_norm": 22984.619140625,
+ "learning_rate": 3.0479062191403785e-05,
+ "loss": 0.4158,
+ "step": 144950
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "grad_norm": 26418.95703125,
+ "learning_rate": 3.0445436385209836e-05,
+ "loss": 0.4059,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "eval_loss": 0.3971329629421234,
+ "eval_runtime": 3201.285,
+ "eval_samples_per_second": 968.711,
+ "eval_steps_per_second": 1.892,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7483709195598001,
+ "grad_norm": 22503.525390625,
+ "learning_rate": 3.0411821016863308e-05,
+ "loss": 0.4048,
+ "step": 145050
+ },
+ {
+ "epoch": 0.7486288895424128,
+ "grad_norm": 23114.525390625,
+ "learning_rate": 3.03782161043074e-05,
+ "loss": 0.4111,
+ "step": 145100
+ },
+ {
+ "epoch": 0.7488868595250256,
+ "grad_norm": 23249.794921875,
+ "learning_rate": 3.0344621665479778e-05,
+ "loss": 0.4093,
+ "step": 145150
+ },
+ {
+ "epoch": 0.7491448295076385,
+ "grad_norm": 23568.833984375,
+ "learning_rate": 3.0311037718312518e-05,
+ "loss": 0.4166,
+ "step": 145200
+ },
+ {
+ "epoch": 0.7494027994902513,
+ "grad_norm": 21794.6015625,
+ "learning_rate": 3.027746428073206e-05,
+ "loss": 0.4122,
+ "step": 145250
+ },
+ {
+ "epoch": 0.7496607694728641,
+ "grad_norm": 23710.212890625,
+ "learning_rate": 3.024390137065929e-05,
+ "loss": 0.4066,
+ "step": 145300
+ },
+ {
+ "epoch": 0.7499187394554769,
+ "grad_norm": 23179.240234375,
+ "learning_rate": 3.0210349006009385e-05,
+ "loss": 0.4127,
+ "step": 145350
+ },
+ {
+ "epoch": 0.7501767094380898,
+ "grad_norm": 25111.078125,
+ "learning_rate": 3.017680720469199e-05,
+ "loss": 0.4128,
+ "step": 145400
+ },
+ {
+ "epoch": 0.7504346794207026,
+ "grad_norm": 24289.095703125,
+ "learning_rate": 3.0143275984611042e-05,
+ "loss": 0.4167,
+ "step": 145450
+ },
+ {
+ "epoch": 0.7506926494033155,
+ "grad_norm": 22695.802734375,
+ "learning_rate": 3.0109755363664893e-05,
+ "loss": 0.4135,
+ "step": 145500
+ },
+ {
+ "epoch": 0.7509506193859282,
+ "grad_norm": 26995.833984375,
+ "learning_rate": 3.0076245359746163e-05,
+ "loss": 0.4057,
+ "step": 145550
+ },
+ {
+ "epoch": 0.7512085893685411,
+ "grad_norm": 21887.568359375,
+ "learning_rate": 3.004274599074185e-05,
+ "loss": 0.4089,
+ "step": 145600
+ },
+ {
+ "epoch": 0.7514665593511539,
+ "grad_norm": 27344.78125,
+ "learning_rate": 3.00092572745333e-05,
+ "loss": 0.4156,
+ "step": 145650
+ },
+ {
+ "epoch": 0.7517245293337668,
+ "grad_norm": 25476.15234375,
+ "learning_rate": 2.9975779228996104e-05,
+ "loss": 0.4113,
+ "step": 145700
+ },
+ {
+ "epoch": 0.7519824993163795,
+ "grad_norm": 24602.64453125,
+ "learning_rate": 2.9942311872000215e-05,
+ "loss": 0.4077,
+ "step": 145750
+ },
+ {
+ "epoch": 0.7522404692989924,
+ "grad_norm": 24124.984375,
+ "learning_rate": 2.990885522140985e-05,
+ "loss": 0.4122,
+ "step": 145800
+ },
+ {
+ "epoch": 0.7524984392816052,
+ "grad_norm": 24945.2109375,
+ "learning_rate": 2.987540929508354e-05,
+ "loss": 0.409,
+ "step": 145850
+ },
+ {
+ "epoch": 0.752756409264218,
+ "grad_norm": 26535.109375,
+ "learning_rate": 2.9841974110874037e-05,
+ "loss": 0.4132,
+ "step": 145900
+ },
+ {
+ "epoch": 0.7530143792468308,
+ "grad_norm": 21566.904296875,
+ "learning_rate": 2.980854968662843e-05,
+ "loss": 0.4073,
+ "step": 145950
+ },
+ {
+ "epoch": 0.7532723492294436,
+ "grad_norm": 22965.73828125,
+ "learning_rate": 2.9775136040188007e-05,
+ "loss": 0.4124,
+ "step": 146000
+ },
+ {
+ "epoch": 0.7535303192120565,
+ "grad_norm": 25307.123046875,
+ "learning_rate": 2.974173318938833e-05,
+ "loss": 0.4134,
+ "step": 146050
+ },
+ {
+ "epoch": 0.7537882891946693,
+ "grad_norm": 22280.431640625,
+ "learning_rate": 2.9708341152059226e-05,
+ "loss": 0.4085,
+ "step": 146100
+ },
+ {
+ "epoch": 0.7540462591772822,
+ "grad_norm": 25268.705078125,
+ "learning_rate": 2.9674959946024662e-05,
+ "loss": 0.4031,
+ "step": 146150
+ },
+ {
+ "epoch": 0.7543042291598949,
+ "grad_norm": 20014.28125,
+ "learning_rate": 2.9641589589102918e-05,
+ "loss": 0.4093,
+ "step": 146200
+ },
+ {
+ "epoch": 0.7545621991425078,
+ "grad_norm": 28430.544921875,
+ "learning_rate": 2.9608230099106427e-05,
+ "loss": 0.4112,
+ "step": 146250
+ },
+ {
+ "epoch": 0.7548201691251206,
+ "grad_norm": 21031.328125,
+ "learning_rate": 2.9574881493841867e-05,
+ "loss": 0.4084,
+ "step": 146300
+ },
+ {
+ "epoch": 0.7550781391077335,
+ "grad_norm": 24118.916015625,
+ "learning_rate": 2.9541543791110032e-05,
+ "loss": 0.4152,
+ "step": 146350
+ },
+ {
+ "epoch": 0.7553361090903462,
+ "grad_norm": 20663.740234375,
+ "learning_rate": 2.950821700870598e-05,
+ "loss": 0.409,
+ "step": 146400
+ },
+ {
+ "epoch": 0.7555940790729591,
+ "grad_norm": 23081.328125,
+ "learning_rate": 2.9474901164418877e-05,
+ "loss": 0.4089,
+ "step": 146450
+ },
+ {
+ "epoch": 0.7558520490555719,
+ "grad_norm": 24167.80859375,
+ "learning_rate": 2.9441596276032085e-05,
+ "loss": 0.4096,
+ "step": 146500
+ },
+ {
+ "epoch": 0.7561100190381848,
+ "grad_norm": 24959.595703125,
+ "learning_rate": 2.940830236132313e-05,
+ "loss": 0.4109,
+ "step": 146550
+ },
+ {
+ "epoch": 0.7563679890207975,
+ "grad_norm": 22731.36328125,
+ "learning_rate": 2.9375019438063622e-05,
+ "loss": 0.41,
+ "step": 146600
+ },
+ {
+ "epoch": 0.7566259590034103,
+ "grad_norm": 24127.41015625,
+ "learning_rate": 2.9341747524019368e-05,
+ "loss": 0.4078,
+ "step": 146650
+ },
+ {
+ "epoch": 0.7568839289860232,
+ "grad_norm": 27476.791015625,
+ "learning_rate": 2.9308486636950254e-05,
+ "loss": 0.4063,
+ "step": 146700
+ },
+ {
+ "epoch": 0.757141898968636,
+ "grad_norm": 24664.61328125,
+ "learning_rate": 2.9275236794610328e-05,
+ "loss": 0.4086,
+ "step": 146750
+ },
+ {
+ "epoch": 0.7573998689512488,
+ "grad_norm": 23883.0625,
+ "learning_rate": 2.9241998014747664e-05,
+ "loss": 0.4046,
+ "step": 146800
+ },
+ {
+ "epoch": 0.7576578389338616,
+ "grad_norm": 23431.509765625,
+ "learning_rate": 2.9208770315104515e-05,
+ "loss": 0.4054,
+ "step": 146850
+ },
+ {
+ "epoch": 0.7579158089164745,
+ "grad_norm": 25177.9453125,
+ "learning_rate": 2.9175553713417176e-05,
+ "loss": 0.4094,
+ "step": 146900
+ },
+ {
+ "epoch": 0.7581737788990873,
+ "grad_norm": 22862.201171875,
+ "learning_rate": 2.9142348227416e-05,
+ "loss": 0.4073,
+ "step": 146950
+ },
+ {
+ "epoch": 0.7584317488817002,
+ "grad_norm": 21731.240234375,
+ "learning_rate": 2.9109153874825478e-05,
+ "loss": 0.4081,
+ "step": 147000
+ },
+ {
+ "epoch": 0.7586897188643129,
+ "grad_norm": 24952.87109375,
+ "learning_rate": 2.9075970673364083e-05,
+ "loss": 0.4092,
+ "step": 147050
+ },
+ {
+ "epoch": 0.7589476888469258,
+ "grad_norm": 23138.029296875,
+ "learning_rate": 2.9042798640744385e-05,
+ "loss": 0.4051,
+ "step": 147100
+ },
+ {
+ "epoch": 0.7592056588295386,
+ "grad_norm": 21496.501953125,
+ "learning_rate": 2.900963779467295e-05,
+ "loss": 0.4096,
+ "step": 147150
+ },
+ {
+ "epoch": 0.7594636288121515,
+ "grad_norm": 22243.36328125,
+ "learning_rate": 2.8976488152850406e-05,
+ "loss": 0.3985,
+ "step": 147200
+ },
+ {
+ "epoch": 0.7597215987947642,
+ "grad_norm": 24515.029296875,
+ "learning_rate": 2.894334973297137e-05,
+ "loss": 0.4043,
+ "step": 147250
+ },
+ {
+ "epoch": 0.759979568777377,
+ "grad_norm": 23431.802734375,
+ "learning_rate": 2.8910222552724553e-05,
+ "loss": 0.4092,
+ "step": 147300
+ },
+ {
+ "epoch": 0.7602375387599899,
+ "grad_norm": 24192.44140625,
+ "learning_rate": 2.8877106629792515e-05,
+ "loss": 0.413,
+ "step": 147350
+ },
+ {
+ "epoch": 0.7604955087426027,
+ "grad_norm": 24239.015625,
+ "learning_rate": 2.884400198185196e-05,
+ "loss": 0.4064,
+ "step": 147400
+ },
+ {
+ "epoch": 0.7607534787252155,
+ "grad_norm": 22407.27734375,
+ "learning_rate": 2.881090862657348e-05,
+ "loss": 0.4086,
+ "step": 147450
+ },
+ {
+ "epoch": 0.7610114487078283,
+ "grad_norm": 24915.517578125,
+ "learning_rate": 2.877782658162166e-05,
+ "loss": 0.4067,
+ "step": 147500
+ },
+ {
+ "epoch": 0.7612694186904412,
+ "grad_norm": 23721.33984375,
+ "learning_rate": 2.8744755864655098e-05,
+ "loss": 0.4078,
+ "step": 147550
+ },
+ {
+ "epoch": 0.761527388673054,
+ "grad_norm": 23041.625,
+ "learning_rate": 2.8711696493326233e-05,
+ "loss": 0.4092,
+ "step": 147600
+ },
+ {
+ "epoch": 0.7617853586556669,
+ "grad_norm": 24021.81640625,
+ "learning_rate": 2.867864848528158e-05,
+ "loss": 0.4116,
+ "step": 147650
+ },
+ {
+ "epoch": 0.7620433286382796,
+ "grad_norm": 21309.7890625,
+ "learning_rate": 2.8645611858161502e-05,
+ "loss": 0.414,
+ "step": 147700
+ },
+ {
+ "epoch": 0.7623012986208925,
+ "grad_norm": 21959.544921875,
+ "learning_rate": 2.8612586629600307e-05,
+ "loss": 0.4113,
+ "step": 147750
+ },
+ {
+ "epoch": 0.7625592686035053,
+ "grad_norm": 22090.75,
+ "learning_rate": 2.857957281722623e-05,
+ "loss": 0.41,
+ "step": 147800
+ },
+ {
+ "epoch": 0.7628172385861182,
+ "grad_norm": 21273.6640625,
+ "learning_rate": 2.854657043866138e-05,
+ "loss": 0.4043,
+ "step": 147850
+ },
+ {
+ "epoch": 0.7630752085687309,
+ "grad_norm": 22781.33984375,
+ "learning_rate": 2.8513579511521825e-05,
+ "loss": 0.4009,
+ "step": 147900
+ },
+ {
+ "epoch": 0.7633331785513437,
+ "grad_norm": 24383.95703125,
+ "learning_rate": 2.8480600053417472e-05,
+ "loss": 0.4077,
+ "step": 147950
+ },
+ {
+ "epoch": 0.7635911485339566,
+ "grad_norm": 23988.673828125,
+ "learning_rate": 2.8447632081952104e-05,
+ "loss": 0.4048,
+ "step": 148000
+ },
+ {
+ "epoch": 0.7638491185165694,
+ "grad_norm": 24593.1484375,
+ "learning_rate": 2.8414675614723397e-05,
+ "loss": 0.4145,
+ "step": 148050
+ },
+ {
+ "epoch": 0.7641070884991822,
+ "grad_norm": 25818.216796875,
+ "learning_rate": 2.838173066932287e-05,
+ "loss": 0.408,
+ "step": 148100
+ },
+ {
+ "epoch": 0.764365058481795,
+ "grad_norm": 25780.1796875,
+ "learning_rate": 2.8348797263335886e-05,
+ "loss": 0.4109,
+ "step": 148150
+ },
+ {
+ "epoch": 0.7646230284644079,
+ "grad_norm": 22835.51171875,
+ "learning_rate": 2.8315875414341687e-05,
+ "loss": 0.4037,
+ "step": 148200
+ },
+ {
+ "epoch": 0.7648809984470207,
+ "grad_norm": 22711.501953125,
+ "learning_rate": 2.8282965139913296e-05,
+ "loss": 0.4117,
+ "step": 148250
+ },
+ {
+ "epoch": 0.7651389684296336,
+ "grad_norm": 22654.080078125,
+ "learning_rate": 2.825006645761758e-05,
+ "loss": 0.4094,
+ "step": 148300
+ },
+ {
+ "epoch": 0.7653969384122463,
+ "grad_norm": 23406.8671875,
+ "learning_rate": 2.821717938501526e-05,
+ "loss": 0.4096,
+ "step": 148350
+ },
+ {
+ "epoch": 0.7656549083948592,
+ "grad_norm": 23591.68359375,
+ "learning_rate": 2.8184303939660745e-05,
+ "loss": 0.4087,
+ "step": 148400
+ },
+ {
+ "epoch": 0.765912878377472,
+ "grad_norm": 21550.94140625,
+ "learning_rate": 2.815144013910237e-05,
+ "loss": 0.4046,
+ "step": 148450
+ },
+ {
+ "epoch": 0.7661708483600849,
+ "grad_norm": 23503.48046875,
+ "learning_rate": 2.8118588000882177e-05,
+ "loss": 0.4116,
+ "step": 148500
+ },
+ {
+ "epoch": 0.7664288183426976,
+ "grad_norm": 25247.244140625,
+ "learning_rate": 2.8085747542536e-05,
+ "loss": 0.4023,
+ "step": 148550
+ },
+ {
+ "epoch": 0.7666867883253105,
+ "grad_norm": 23665.91796875,
+ "learning_rate": 2.805291878159344e-05,
+ "loss": 0.4117,
+ "step": 148600
+ },
+ {
+ "epoch": 0.7669447583079233,
+ "grad_norm": 22785.59765625,
+ "learning_rate": 2.8020101735577837e-05,
+ "loss": 0.4084,
+ "step": 148650
+ },
+ {
+ "epoch": 0.7672027282905362,
+ "grad_norm": 20447.72265625,
+ "learning_rate": 2.7987296422006327e-05,
+ "loss": 0.4091,
+ "step": 148700
+ },
+ {
+ "epoch": 0.7674606982731489,
+ "grad_norm": 24965.869140625,
+ "learning_rate": 2.795450285838974e-05,
+ "loss": 0.4067,
+ "step": 148750
+ },
+ {
+ "epoch": 0.7677186682557617,
+ "grad_norm": 24323.09765625,
+ "learning_rate": 2.7921721062232637e-05,
+ "loss": 0.4037,
+ "step": 148800
+ },
+ {
+ "epoch": 0.7679766382383746,
+ "grad_norm": 23956.177734375,
+ "learning_rate": 2.7888951051033314e-05,
+ "loss": 0.4079,
+ "step": 148850
+ },
+ {
+ "epoch": 0.7682346082209874,
+ "grad_norm": 24222.4140625,
+ "learning_rate": 2.7856192842283756e-05,
+ "loss": 0.4112,
+ "step": 148900
+ },
+ {
+ "epoch": 0.7684925782036002,
+ "grad_norm": 24444.046875,
+ "learning_rate": 2.782344645346966e-05,
+ "loss": 0.4148,
+ "step": 148950
+ },
+ {
+ "epoch": 0.768750548186213,
+ "grad_norm": 23160.578125,
+ "learning_rate": 2.779071190207046e-05,
+ "loss": 0.4063,
+ "step": 149000
+ },
+ {
+ "epoch": 0.7690085181688259,
+ "grad_norm": 25806.732421875,
+ "learning_rate": 2.7757989205559142e-05,
+ "loss": 0.4112,
+ "step": 149050
+ },
+ {
+ "epoch": 0.7692664881514387,
+ "grad_norm": 21389.734375,
+ "learning_rate": 2.7725278381402524e-05,
+ "loss": 0.4104,
+ "step": 149100
+ },
+ {
+ "epoch": 0.7695244581340516,
+ "grad_norm": 23550.23828125,
+ "learning_rate": 2.769257944706098e-05,
+ "loss": 0.4121,
+ "step": 149150
+ },
+ {
+ "epoch": 0.7697824281166643,
+ "grad_norm": 21442.373046875,
+ "learning_rate": 2.765989241998854e-05,
+ "loss": 0.4087,
+ "step": 149200
+ },
+ {
+ "epoch": 0.7700403980992772,
+ "grad_norm": 23958.978515625,
+ "learning_rate": 2.7627217317632993e-05,
+ "loss": 0.4136,
+ "step": 149250
+ },
+ {
+ "epoch": 0.77029836808189,
+ "grad_norm": 22143.07421875,
+ "learning_rate": 2.759455415743556e-05,
+ "loss": 0.41,
+ "step": 149300
+ },
+ {
+ "epoch": 0.7705563380645029,
+ "grad_norm": 22873.86328125,
+ "learning_rate": 2.7561902956831294e-05,
+ "loss": 0.4094,
+ "step": 149350
+ },
+ {
+ "epoch": 0.7708143080471156,
+ "grad_norm": 22419.3046875,
+ "learning_rate": 2.7529263733248734e-05,
+ "loss": 0.4133,
+ "step": 149400
+ },
+ {
+ "epoch": 0.7710722780297284,
+ "grad_norm": 22167.474609375,
+ "learning_rate": 2.7496636504110075e-05,
+ "loss": 0.4181,
+ "step": 149450
+ },
+ {
+ "epoch": 0.7713302480123413,
+ "grad_norm": 25449.96875,
+ "learning_rate": 2.74640212868311e-05,
+ "loss": 0.412,
+ "step": 149500
+ },
+ {
+ "epoch": 0.7715882179949541,
+ "grad_norm": 22876.767578125,
+ "learning_rate": 2.7431418098821154e-05,
+ "loss": 0.4087,
+ "step": 149550
+ },
+ {
+ "epoch": 0.7718461879775669,
+ "grad_norm": 25600.65625,
+ "learning_rate": 2.7398826957483235e-05,
+ "loss": 0.4133,
+ "step": 149600
+ },
+ {
+ "epoch": 0.7721041579601797,
+ "grad_norm": 21764.0,
+ "learning_rate": 2.7366247880213834e-05,
+ "loss": 0.4073,
+ "step": 149650
+ },
+ {
+ "epoch": 0.7723621279427926,
+ "grad_norm": 21836.0625,
+ "learning_rate": 2.7333680884403046e-05,
+ "loss": 0.4165,
+ "step": 149700
+ },
+ {
+ "epoch": 0.7726200979254054,
+ "grad_norm": 22049.466796875,
+ "learning_rate": 2.7301125987434496e-05,
+ "loss": 0.4104,
+ "step": 149750
+ },
+ {
+ "epoch": 0.7728780679080183,
+ "grad_norm": 25398.28515625,
+ "learning_rate": 2.7268583206685348e-05,
+ "loss": 0.4036,
+ "step": 149800
+ },
+ {
+ "epoch": 0.773136037890631,
+ "grad_norm": 22303.654296875,
+ "learning_rate": 2.72360525595263e-05,
+ "loss": 0.4077,
+ "step": 149850
+ },
+ {
+ "epoch": 0.7733940078732439,
+ "grad_norm": 24734.65234375,
+ "learning_rate": 2.7203534063321633e-05,
+ "loss": 0.409,
+ "step": 149900
+ },
+ {
+ "epoch": 0.7736519778558567,
+ "grad_norm": 22068.283203125,
+ "learning_rate": 2.7171027735429023e-05,
+ "loss": 0.4148,
+ "step": 149950
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "grad_norm": 23250.4921875,
+ "learning_rate": 2.7138533593199766e-05,
+ "loss": 0.4062,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "eval_loss": 0.3953176736831665,
+ "eval_runtime": 3196.6561,
+ "eval_samples_per_second": 970.114,
+ "eval_steps_per_second": 1.895,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7741679178210823,
+ "grad_norm": 26452.75390625,
+ "learning_rate": 2.710605165397859e-05,
+ "loss": 0.4098,
+ "step": 150050
+ },
+ {
+ "epoch": 0.7744258878036951,
+ "grad_norm": 23934.783203125,
+ "learning_rate": 2.707358193510371e-05,
+ "loss": 0.4113,
+ "step": 150100
+ },
+ {
+ "epoch": 0.774683857786308,
+ "grad_norm": 22443.591796875,
+ "learning_rate": 2.7041124453906884e-05,
+ "loss": 0.4119,
+ "step": 150150
+ },
+ {
+ "epoch": 0.7749418277689208,
+ "grad_norm": 23333.529296875,
+ "learning_rate": 2.7008679227713214e-05,
+ "loss": 0.4029,
+ "step": 150200
+ },
+ {
+ "epoch": 0.7751997977515336,
+ "grad_norm": 22431.576171875,
+ "learning_rate": 2.6976246273841388e-05,
+ "loss": 0.4045,
+ "step": 150250
+ },
+ {
+ "epoch": 0.7754577677341464,
+ "grad_norm": 26959.68359375,
+ "learning_rate": 2.694382560960348e-05,
+ "loss": 0.4072,
+ "step": 150300
+ },
+ {
+ "epoch": 0.7757157377167593,
+ "grad_norm": 21064.66015625,
+ "learning_rate": 2.6911417252304994e-05,
+ "loss": 0.411,
+ "step": 150350
+ },
+ {
+ "epoch": 0.7759737076993721,
+ "grad_norm": 23242.583984375,
+ "learning_rate": 2.6879021219244906e-05,
+ "loss": 0.4075,
+ "step": 150400
+ },
+ {
+ "epoch": 0.776231677681985,
+ "grad_norm": 24738.037109375,
+ "learning_rate": 2.6846637527715546e-05,
+ "loss": 0.4069,
+ "step": 150450
+ },
+ {
+ "epoch": 0.7764896476645977,
+ "grad_norm": 23944.759765625,
+ "learning_rate": 2.681426619500277e-05,
+ "loss": 0.403,
+ "step": 150500
+ },
+ {
+ "epoch": 0.7767476176472106,
+ "grad_norm": 22064.611328125,
+ "learning_rate": 2.678190723838572e-05,
+ "loss": 0.4045,
+ "step": 150550
+ },
+ {
+ "epoch": 0.7770055876298234,
+ "grad_norm": 24025.298828125,
+ "learning_rate": 2.6749560675137002e-05,
+ "loss": 0.4087,
+ "step": 150600
+ },
+ {
+ "epoch": 0.7772635576124363,
+ "grad_norm": 20863.119140625,
+ "learning_rate": 2.6717226522522553e-05,
+ "loss": 0.4087,
+ "step": 150650
+ },
+ {
+ "epoch": 0.777521527595049,
+ "grad_norm": 24537.642578125,
+ "learning_rate": 2.668490479780179e-05,
+ "loss": 0.4127,
+ "step": 150700
+ },
+ {
+ "epoch": 0.7777794975776618,
+ "grad_norm": 24400.193359375,
+ "learning_rate": 2.665259551822733e-05,
+ "loss": 0.4066,
+ "step": 150750
+ },
+ {
+ "epoch": 0.7780374675602747,
+ "grad_norm": 25251.81640625,
+ "learning_rate": 2.6620298701045322e-05,
+ "loss": 0.4111,
+ "step": 150800
+ },
+ {
+ "epoch": 0.7782954375428875,
+ "grad_norm": 23078.0,
+ "learning_rate": 2.658801436349511e-05,
+ "loss": 0.4109,
+ "step": 150850
+ },
+ {
+ "epoch": 0.7785534075255003,
+ "grad_norm": 20437.556640625,
+ "learning_rate": 2.655574252280949e-05,
+ "loss": 0.4096,
+ "step": 150900
+ },
+ {
+ "epoch": 0.7788113775081131,
+ "grad_norm": 24091.796875,
+ "learning_rate": 2.652348319621457e-05,
+ "loss": 0.4097,
+ "step": 150950
+ },
+ {
+ "epoch": 0.779069347490726,
+ "grad_norm": 22893.6640625,
+ "learning_rate": 2.6491236400929686e-05,
+ "loss": 0.4093,
+ "step": 151000
+ },
+ {
+ "epoch": 0.7793273174733388,
+ "grad_norm": 22871.80859375,
+ "learning_rate": 2.645900215416761e-05,
+ "loss": 0.407,
+ "step": 151050
+ },
+ {
+ "epoch": 0.7795852874559517,
+ "grad_norm": 21766.30078125,
+ "learning_rate": 2.642678047313435e-05,
+ "loss": 0.4071,
+ "step": 151100
+ },
+ {
+ "epoch": 0.7798432574385644,
+ "grad_norm": 24945.544921875,
+ "learning_rate": 2.639457137502919e-05,
+ "loss": 0.4073,
+ "step": 151150
+ },
+ {
+ "epoch": 0.7801012274211773,
+ "grad_norm": 22374.009765625,
+ "learning_rate": 2.636237487704475e-05,
+ "loss": 0.409,
+ "step": 151200
+ },
+ {
+ "epoch": 0.7803591974037901,
+ "grad_norm": 23499.08984375,
+ "learning_rate": 2.6330190996366875e-05,
+ "loss": 0.4087,
+ "step": 151250
+ },
+ {
+ "epoch": 0.780617167386403,
+ "grad_norm": 24672.017578125,
+ "learning_rate": 2.629801975017469e-05,
+ "loss": 0.4075,
+ "step": 151300
+ },
+ {
+ "epoch": 0.7808751373690157,
+ "grad_norm": 23105.05078125,
+ "learning_rate": 2.6265861155640626e-05,
+ "loss": 0.4031,
+ "step": 151350
+ },
+ {
+ "epoch": 0.7811331073516286,
+ "grad_norm": 23226.171875,
+ "learning_rate": 2.6233715229930282e-05,
+ "loss": 0.4137,
+ "step": 151400
+ },
+ {
+ "epoch": 0.7813910773342414,
+ "grad_norm": 24494.732421875,
+ "learning_rate": 2.620158199020255e-05,
+ "loss": 0.4089,
+ "step": 151450
+ },
+ {
+ "epoch": 0.7816490473168543,
+ "grad_norm": 24024.236328125,
+ "learning_rate": 2.616946145360952e-05,
+ "loss": 0.4084,
+ "step": 151500
+ },
+ {
+ "epoch": 0.781907017299467,
+ "grad_norm": 21957.2265625,
+ "learning_rate": 2.613735363729649e-05,
+ "loss": 0.4079,
+ "step": 151550
+ },
+ {
+ "epoch": 0.7821649872820798,
+ "grad_norm": 22637.291015625,
+ "learning_rate": 2.6105258558402056e-05,
+ "loss": 0.4093,
+ "step": 151600
+ },
+ {
+ "epoch": 0.7824229572646927,
+ "grad_norm": 27436.56640625,
+ "learning_rate": 2.607317623405787e-05,
+ "loss": 0.4054,
+ "step": 151650
+ },
+ {
+ "epoch": 0.7826809272473055,
+ "grad_norm": 21909.509765625,
+ "learning_rate": 2.6041106681388922e-05,
+ "loss": 0.4052,
+ "step": 151700
+ },
+ {
+ "epoch": 0.7829388972299183,
+ "grad_norm": 22887.494140625,
+ "learning_rate": 2.6009049917513283e-05,
+ "loss": 0.408,
+ "step": 151750
+ },
+ {
+ "epoch": 0.7831968672125311,
+ "grad_norm": 20771.53125,
+ "learning_rate": 2.5977005959542222e-05,
+ "loss": 0.4052,
+ "step": 151800
+ },
+ {
+ "epoch": 0.783454837195144,
+ "grad_norm": 22012.322265625,
+ "learning_rate": 2.5944974824580244e-05,
+ "loss": 0.4053,
+ "step": 151850
+ },
+ {
+ "epoch": 0.7837128071777568,
+ "grad_norm": 25365.822265625,
+ "learning_rate": 2.5912956529724865e-05,
+ "loss": 0.4141,
+ "step": 151900
+ },
+ {
+ "epoch": 0.7839707771603697,
+ "grad_norm": 23211.658203125,
+ "learning_rate": 2.5880951092066885e-05,
+ "loss": 0.4094,
+ "step": 151950
+ },
+ {
+ "epoch": 0.7842287471429824,
+ "grad_norm": 21514.79296875,
+ "learning_rate": 2.584895852869018e-05,
+ "loss": 0.4056,
+ "step": 152000
+ },
+ {
+ "epoch": 0.7844867171255953,
+ "grad_norm": 23275.76953125,
+ "learning_rate": 2.581697885667176e-05,
+ "loss": 0.4076,
+ "step": 152050
+ },
+ {
+ "epoch": 0.7847446871082081,
+ "grad_norm": 24080.478515625,
+ "learning_rate": 2.578501209308174e-05,
+ "loss": 0.409,
+ "step": 152100
+ },
+ {
+ "epoch": 0.785002657090821,
+ "grad_norm": 23384.275390625,
+ "learning_rate": 2.5753058254983376e-05,
+ "loss": 0.4063,
+ "step": 152150
+ },
+ {
+ "epoch": 0.7852606270734337,
+ "grad_norm": 22736.451171875,
+ "learning_rate": 2.572111735943298e-05,
+ "loss": 0.4054,
+ "step": 152200
+ },
+ {
+ "epoch": 0.7855185970560465,
+ "grad_norm": 24730.462890625,
+ "learning_rate": 2.568918942348002e-05,
+ "loss": 0.4074,
+ "step": 152250
+ },
+ {
+ "epoch": 0.7857765670386594,
+ "grad_norm": 23020.759765625,
+ "learning_rate": 2.5657274464166996e-05,
+ "loss": 0.4143,
+ "step": 152300
+ },
+ {
+ "epoch": 0.7860345370212722,
+ "grad_norm": 22263.357421875,
+ "learning_rate": 2.56253724985295e-05,
+ "loss": 0.4075,
+ "step": 152350
+ },
+ {
+ "epoch": 0.786292507003885,
+ "grad_norm": 23515.408203125,
+ "learning_rate": 2.5593483543596165e-05,
+ "loss": 0.4055,
+ "step": 152400
+ },
+ {
+ "epoch": 0.7865504769864978,
+ "grad_norm": 21960.447265625,
+ "learning_rate": 2.55616076163887e-05,
+ "loss": 0.407,
+ "step": 152450
+ },
+ {
+ "epoch": 0.7868084469691107,
+ "grad_norm": 26880.94140625,
+ "learning_rate": 2.55297447339219e-05,
+ "loss": 0.4029,
+ "step": 152500
+ },
+ {
+ "epoch": 0.7870664169517235,
+ "grad_norm": 22276.259765625,
+ "learning_rate": 2.5497894913203492e-05,
+ "loss": 0.4038,
+ "step": 152550
+ },
+ {
+ "epoch": 0.7873243869343364,
+ "grad_norm": 22566.541015625,
+ "learning_rate": 2.5466058171234336e-05,
+ "loss": 0.4055,
+ "step": 152600
+ },
+ {
+ "epoch": 0.7875823569169491,
+ "grad_norm": 24620.486328125,
+ "learning_rate": 2.543423452500826e-05,
+ "loss": 0.4031,
+ "step": 152650
+ },
+ {
+ "epoch": 0.787840326899562,
+ "grad_norm": 24162.99609375,
+ "learning_rate": 2.540242399151208e-05,
+ "loss": 0.4075,
+ "step": 152700
+ },
+ {
+ "epoch": 0.7880982968821748,
+ "grad_norm": 25309.958984375,
+ "learning_rate": 2.537062658772572e-05,
+ "loss": 0.4052,
+ "step": 152750
+ },
+ {
+ "epoch": 0.7883562668647877,
+ "grad_norm": 22024.390625,
+ "learning_rate": 2.533884233062192e-05,
+ "loss": 0.4036,
+ "step": 152800
+ },
+ {
+ "epoch": 0.7886142368474004,
+ "grad_norm": 22356.041015625,
+ "learning_rate": 2.530707123716657e-05,
+ "loss": 0.4065,
+ "step": 152850
+ },
+ {
+ "epoch": 0.7888722068300132,
+ "grad_norm": 22957.642578125,
+ "learning_rate": 2.527531332431844e-05,
+ "loss": 0.403,
+ "step": 152900
+ },
+ {
+ "epoch": 0.7891301768126261,
+ "grad_norm": 22161.298828125,
+ "learning_rate": 2.52435686090293e-05,
+ "loss": 0.4046,
+ "step": 152950
+ },
+ {
+ "epoch": 0.7893881467952389,
+ "grad_norm": 22849.720703125,
+ "learning_rate": 2.5211837108243847e-05,
+ "loss": 0.4045,
+ "step": 153000
+ },
+ {
+ "epoch": 0.7896461167778517,
+ "grad_norm": 25891.248046875,
+ "learning_rate": 2.5180118838899756e-05,
+ "loss": 0.4083,
+ "step": 153050
+ },
+ {
+ "epoch": 0.7899040867604645,
+ "grad_norm": 23150.634765625,
+ "learning_rate": 2.5148413817927598e-05,
+ "loss": 0.4104,
+ "step": 153100
+ },
+ {
+ "epoch": 0.7901620567430774,
+ "grad_norm": 23457.515625,
+ "learning_rate": 2.511672206225094e-05,
+ "loss": 0.4101,
+ "step": 153150
+ },
+ {
+ "epoch": 0.7904200267256902,
+ "grad_norm": 21316.8828125,
+ "learning_rate": 2.508504358878621e-05,
+ "loss": 0.4091,
+ "step": 153200
+ },
+ {
+ "epoch": 0.7906779967083031,
+ "grad_norm": 25747.87109375,
+ "learning_rate": 2.5053378414442748e-05,
+ "loss": 0.4131,
+ "step": 153250
+ },
+ {
+ "epoch": 0.7909359666909158,
+ "grad_norm": 21499.56640625,
+ "learning_rate": 2.502172655612286e-05,
+ "loss": 0.4028,
+ "step": 153300
+ },
+ {
+ "epoch": 0.7911939366735287,
+ "grad_norm": 22949.970703125,
+ "learning_rate": 2.499008803072162e-05,
+ "loss": 0.4078,
+ "step": 153350
+ },
+ {
+ "epoch": 0.7914519066561415,
+ "grad_norm": 26207.181640625,
+ "learning_rate": 2.495846285512714e-05,
+ "loss": 0.4064,
+ "step": 153400
+ },
+ {
+ "epoch": 0.7917098766387544,
+ "grad_norm": 25037.625,
+ "learning_rate": 2.4926851046220246e-05,
+ "loss": 0.4067,
+ "step": 153450
+ },
+ {
+ "epoch": 0.7919678466213671,
+ "grad_norm": 24114.482421875,
+ "learning_rate": 2.4895252620874775e-05,
+ "loss": 0.4123,
+ "step": 153500
+ },
+ {
+ "epoch": 0.79222581660398,
+ "grad_norm": 24953.568359375,
+ "learning_rate": 2.4863667595957325e-05,
+ "loss": 0.4083,
+ "step": 153550
+ },
+ {
+ "epoch": 0.7924837865865928,
+ "grad_norm": 24928.2265625,
+ "learning_rate": 2.483209598832736e-05,
+ "loss": 0.4066,
+ "step": 153600
+ },
+ {
+ "epoch": 0.7927417565692056,
+ "grad_norm": 24045.166015625,
+ "learning_rate": 2.4800537814837227e-05,
+ "loss": 0.4056,
+ "step": 153650
+ },
+ {
+ "epoch": 0.7929997265518184,
+ "grad_norm": 24591.826171875,
+ "learning_rate": 2.476899309233205e-05,
+ "loss": 0.4094,
+ "step": 153700
+ },
+ {
+ "epoch": 0.7932576965344312,
+ "grad_norm": 23336.810546875,
+ "learning_rate": 2.4737461837649782e-05,
+ "loss": 0.41,
+ "step": 153750
+ },
+ {
+ "epoch": 0.7935156665170441,
+ "grad_norm": 23454.171875,
+ "learning_rate": 2.4705944067621216e-05,
+ "loss": 0.4068,
+ "step": 153800
+ },
+ {
+ "epoch": 0.7937736364996569,
+ "grad_norm": 25322.201171875,
+ "learning_rate": 2.467443979906991e-05,
+ "loss": 0.4097,
+ "step": 153850
+ },
+ {
+ "epoch": 0.7940316064822697,
+ "grad_norm": 24731.580078125,
+ "learning_rate": 2.464294904881222e-05,
+ "loss": 0.4028,
+ "step": 153900
+ },
+ {
+ "epoch": 0.7942895764648825,
+ "grad_norm": 21753.568359375,
+ "learning_rate": 2.4611471833657356e-05,
+ "loss": 0.4148,
+ "step": 153950
+ },
+ {
+ "epoch": 0.7945475464474954,
+ "grad_norm": 26548.966796875,
+ "learning_rate": 2.458000817040717e-05,
+ "loss": 0.4074,
+ "step": 154000
+ },
+ {
+ "epoch": 0.7948055164301082,
+ "grad_norm": 21149.470703125,
+ "learning_rate": 2.4548558075856414e-05,
+ "loss": 0.408,
+ "step": 154050
+ },
+ {
+ "epoch": 0.7950634864127211,
+ "grad_norm": 25742.859375,
+ "learning_rate": 2.4517121566792517e-05,
+ "loss": 0.405,
+ "step": 154100
+ },
+ {
+ "epoch": 0.7953214563953338,
+ "grad_norm": 20954.91796875,
+ "learning_rate": 2.4485698659995658e-05,
+ "loss": 0.3975,
+ "step": 154150
+ },
+ {
+ "epoch": 0.7955794263779467,
+ "grad_norm": 23551.646484375,
+ "learning_rate": 2.445428937223884e-05,
+ "loss": 0.4059,
+ "step": 154200
+ },
+ {
+ "epoch": 0.7958373963605595,
+ "grad_norm": 25214.693359375,
+ "learning_rate": 2.4422893720287654e-05,
+ "loss": 0.4008,
+ "step": 154250
+ },
+ {
+ "epoch": 0.7960953663431724,
+ "grad_norm": 25346.916015625,
+ "learning_rate": 2.4391511720900545e-05,
+ "loss": 0.4035,
+ "step": 154300
+ },
+ {
+ "epoch": 0.7963533363257851,
+ "grad_norm": 21641.23828125,
+ "learning_rate": 2.43601433908286e-05,
+ "loss": 0.4069,
+ "step": 154350
+ },
+ {
+ "epoch": 0.7966113063083979,
+ "grad_norm": 22860.998046875,
+ "learning_rate": 2.4328788746815628e-05,
+ "loss": 0.4022,
+ "step": 154400
+ },
+ {
+ "epoch": 0.7968692762910108,
+ "grad_norm": 21989.96484375,
+ "learning_rate": 2.429744780559813e-05,
+ "loss": 0.4055,
+ "step": 154450
+ },
+ {
+ "epoch": 0.7971272462736236,
+ "grad_norm": 24413.74609375,
+ "learning_rate": 2.4266120583905272e-05,
+ "loss": 0.412,
+ "step": 154500
+ },
+ {
+ "epoch": 0.7973852162562364,
+ "grad_norm": 24805.859375,
+ "learning_rate": 2.4234807098458957e-05,
+ "loss": 0.41,
+ "step": 154550
+ },
+ {
+ "epoch": 0.7976431862388492,
+ "grad_norm": 23658.326171875,
+ "learning_rate": 2.42035073659737e-05,
+ "loss": 0.41,
+ "step": 154600
+ },
+ {
+ "epoch": 0.7979011562214621,
+ "grad_norm": 25225.228515625,
+ "learning_rate": 2.417222140315669e-05,
+ "loss": 0.4069,
+ "step": 154650
+ },
+ {
+ "epoch": 0.7981591262040749,
+ "grad_norm": 23417.3828125,
+ "learning_rate": 2.414094922670777e-05,
+ "loss": 0.4102,
+ "step": 154700
+ },
+ {
+ "epoch": 0.7984170961866878,
+ "grad_norm": 25014.5078125,
+ "learning_rate": 2.4109690853319422e-05,
+ "loss": 0.412,
+ "step": 154750
+ },
+ {
+ "epoch": 0.7986750661693005,
+ "grad_norm": 25523.3125,
+ "learning_rate": 2.407844629967674e-05,
+ "loss": 0.4102,
+ "step": 154800
+ },
+ {
+ "epoch": 0.7989330361519134,
+ "grad_norm": 23173.44921875,
+ "learning_rate": 2.404721558245752e-05,
+ "loss": 0.407,
+ "step": 154850
+ },
+ {
+ "epoch": 0.7991910061345262,
+ "grad_norm": 24673.5078125,
+ "learning_rate": 2.401599871833204e-05,
+ "loss": 0.4054,
+ "step": 154900
+ },
+ {
+ "epoch": 0.799448976117139,
+ "grad_norm": 24709.765625,
+ "learning_rate": 2.398479572396331e-05,
+ "loss": 0.4097,
+ "step": 154950
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "grad_norm": 22404.29296875,
+ "learning_rate": 2.395360661600687e-05,
+ "loss": 0.4072,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "eval_loss": 0.39372530579566956,
+ "eval_runtime": 3195.8879,
+ "eval_samples_per_second": 970.347,
+ "eval_steps_per_second": 1.895,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7999649160823646,
+ "grad_norm": 24004.09375,
+ "learning_rate": 2.3922431411110834e-05,
+ "loss": 0.4016,
+ "step": 155050
+ },
+ {
+ "epoch": 0.8002228860649775,
+ "grad_norm": 25013.6484375,
+ "learning_rate": 2.3891270125915992e-05,
+ "loss": 0.4068,
+ "step": 155100
+ },
+ {
+ "epoch": 0.8004808560475903,
+ "grad_norm": 23532.982421875,
+ "learning_rate": 2.3860122777055553e-05,
+ "loss": 0.4036,
+ "step": 155150
+ },
+ {
+ "epoch": 0.8007388260302031,
+ "grad_norm": 27413.044921875,
+ "learning_rate": 2.3828989381155426e-05,
+ "loss": 0.4098,
+ "step": 155200
+ },
+ {
+ "epoch": 0.8009967960128159,
+ "grad_norm": 25821.794921875,
+ "learning_rate": 2.379786995483399e-05,
+ "loss": 0.4076,
+ "step": 155250
+ },
+ {
+ "epoch": 0.8012547659954288,
+ "grad_norm": 23864.154296875,
+ "learning_rate": 2.37667645147022e-05,
+ "loss": 0.4082,
+ "step": 155300
+ },
+ {
+ "epoch": 0.8015127359780416,
+ "grad_norm": 22892.451171875,
+ "learning_rate": 2.3735673077363534e-05,
+ "loss": 0.4116,
+ "step": 155350
+ },
+ {
+ "epoch": 0.8017707059606545,
+ "grad_norm": 24638.51953125,
+ "learning_rate": 2.3704595659413987e-05,
+ "loss": 0.4015,
+ "step": 155400
+ },
+ {
+ "epoch": 0.8020286759432672,
+ "grad_norm": 23007.734375,
+ "learning_rate": 2.3673532277442112e-05,
+ "loss": 0.4075,
+ "step": 155450
+ },
+ {
+ "epoch": 0.8022866459258801,
+ "grad_norm": 25629.17578125,
+ "learning_rate": 2.364248294802892e-05,
+ "loss": 0.4031,
+ "step": 155500
+ },
+ {
+ "epoch": 0.8025446159084929,
+ "grad_norm": 23949.939453125,
+ "learning_rate": 2.3611447687747955e-05,
+ "loss": 0.4091,
+ "step": 155550
+ },
+ {
+ "epoch": 0.8028025858911058,
+ "grad_norm": 23120.3515625,
+ "learning_rate": 2.3580426513165228e-05,
+ "loss": 0.4106,
+ "step": 155600
+ },
+ {
+ "epoch": 0.8030605558737185,
+ "grad_norm": 26965.955078125,
+ "learning_rate": 2.3549419440839236e-05,
+ "loss": 0.4054,
+ "step": 155650
+ },
+ {
+ "epoch": 0.8033185258563313,
+ "grad_norm": 23370.33984375,
+ "learning_rate": 2.3518426487320948e-05,
+ "loss": 0.407,
+ "step": 155700
+ },
+ {
+ "epoch": 0.8035764958389442,
+ "grad_norm": 22571.12890625,
+ "learning_rate": 2.3487447669153833e-05,
+ "loss": 0.4118,
+ "step": 155750
+ },
+ {
+ "epoch": 0.803834465821557,
+ "grad_norm": 24092.56640625,
+ "learning_rate": 2.3456483002873768e-05,
+ "loss": 0.4053,
+ "step": 155800
+ },
+ {
+ "epoch": 0.8040924358041698,
+ "grad_norm": 24549.140625,
+ "learning_rate": 2.3425532505009072e-05,
+ "loss": 0.405,
+ "step": 155850
+ },
+ {
+ "epoch": 0.8043504057867826,
+ "grad_norm": 23510.904296875,
+ "learning_rate": 2.3394596192080574e-05,
+ "loss": 0.4049,
+ "step": 155900
+ },
+ {
+ "epoch": 0.8046083757693955,
+ "grad_norm": 23147.369140625,
+ "learning_rate": 2.3363674080601416e-05,
+ "loss": 0.4032,
+ "step": 155950
+ },
+ {
+ "epoch": 0.8048663457520083,
+ "grad_norm": 21877.10546875,
+ "learning_rate": 2.3332766187077264e-05,
+ "loss": 0.4006,
+ "step": 156000
+ },
+ {
+ "epoch": 0.8051243157346211,
+ "grad_norm": 24041.384765625,
+ "learning_rate": 2.330187252800614e-05,
+ "loss": 0.4056,
+ "step": 156050
+ },
+ {
+ "epoch": 0.8053822857172339,
+ "grad_norm": 23452.453125,
+ "learning_rate": 2.327099311987848e-05,
+ "loss": 0.4071,
+ "step": 156100
+ },
+ {
+ "epoch": 0.8056402556998468,
+ "grad_norm": 23023.5859375,
+ "learning_rate": 2.3240127979177123e-05,
+ "loss": 0.4095,
+ "step": 156150
+ },
+ {
+ "epoch": 0.8058982256824596,
+ "grad_norm": 23684.615234375,
+ "learning_rate": 2.3209277122377255e-05,
+ "loss": 0.4023,
+ "step": 156200
+ },
+ {
+ "epoch": 0.8061561956650725,
+ "grad_norm": 22598.732421875,
+ "learning_rate": 2.31784405659465e-05,
+ "loss": 0.4013,
+ "step": 156250
+ },
+ {
+ "epoch": 0.8064141656476852,
+ "grad_norm": 21835.93359375,
+ "learning_rate": 2.3147618326344804e-05,
+ "loss": 0.4072,
+ "step": 156300
+ },
+ {
+ "epoch": 0.806672135630298,
+ "grad_norm": 26343.41015625,
+ "learning_rate": 2.311681042002448e-05,
+ "loss": 0.4154,
+ "step": 156350
+ },
+ {
+ "epoch": 0.8069301056129109,
+ "grad_norm": 24116.162109375,
+ "learning_rate": 2.3086016863430193e-05,
+ "loss": 0.4032,
+ "step": 156400
+ },
+ {
+ "epoch": 0.8071880755955237,
+ "grad_norm": 23874.53515625,
+ "learning_rate": 2.3055237672998946e-05,
+ "loss": 0.4063,
+ "step": 156450
+ },
+ {
+ "epoch": 0.8074460455781365,
+ "grad_norm": 25624.203125,
+ "learning_rate": 2.302447286516006e-05,
+ "loss": 0.4034,
+ "step": 156500
+ },
+ {
+ "epoch": 0.8077040155607493,
+ "grad_norm": 22652.2109375,
+ "learning_rate": 2.2993722456335236e-05,
+ "loss": 0.4049,
+ "step": 156550
+ },
+ {
+ "epoch": 0.8079619855433622,
+ "grad_norm": 26234.255859375,
+ "learning_rate": 2.2962986462938385e-05,
+ "loss": 0.4035,
+ "step": 156600
+ },
+ {
+ "epoch": 0.808219955525975,
+ "grad_norm": 24374.974609375,
+ "learning_rate": 2.293226490137584e-05,
+ "loss": 0.4052,
+ "step": 156650
+ },
+ {
+ "epoch": 0.8084779255085878,
+ "grad_norm": 24195.4296875,
+ "learning_rate": 2.2901557788046146e-05,
+ "loss": 0.4072,
+ "step": 156700
+ },
+ {
+ "epoch": 0.8087358954912006,
+ "grad_norm": 24590.525390625,
+ "learning_rate": 2.2870865139340165e-05,
+ "loss": 0.4092,
+ "step": 156750
+ },
+ {
+ "epoch": 0.8089938654738135,
+ "grad_norm": 20863.509765625,
+ "learning_rate": 2.2840186971641083e-05,
+ "loss": 0.4073,
+ "step": 156800
+ },
+ {
+ "epoch": 0.8092518354564263,
+ "grad_norm": 23662.16015625,
+ "learning_rate": 2.2809523301324238e-05,
+ "loss": 0.4101,
+ "step": 156850
+ },
+ {
+ "epoch": 0.8095098054390392,
+ "grad_norm": 21700.666015625,
+ "learning_rate": 2.2778874144757357e-05,
+ "loss": 0.4075,
+ "step": 156900
+ },
+ {
+ "epoch": 0.8097677754216519,
+ "grad_norm": 29026.71484375,
+ "learning_rate": 2.274823951830036e-05,
+ "loss": 0.4005,
+ "step": 156950
+ },
+ {
+ "epoch": 0.8100257454042648,
+ "grad_norm": 27310.48828125,
+ "learning_rate": 2.2717619438305397e-05,
+ "loss": 0.4058,
+ "step": 157000
+ },
+ {
+ "epoch": 0.8102837153868776,
+ "grad_norm": 25008.673828125,
+ "learning_rate": 2.2687013921116895e-05,
+ "loss": 0.404,
+ "step": 157050
+ },
+ {
+ "epoch": 0.8105416853694904,
+ "grad_norm": 22623.57421875,
+ "learning_rate": 2.2656422983071452e-05,
+ "loss": 0.4059,
+ "step": 157100
+ },
+ {
+ "epoch": 0.8107996553521032,
+ "grad_norm": 23960.427734375,
+ "learning_rate": 2.2625846640497965e-05,
+ "loss": 0.4096,
+ "step": 157150
+ },
+ {
+ "epoch": 0.811057625334716,
+ "grad_norm": 22415.021484375,
+ "learning_rate": 2.2595284909717475e-05,
+ "loss": 0.4061,
+ "step": 157200
+ },
+ {
+ "epoch": 0.8113155953173289,
+ "grad_norm": 23358.822265625,
+ "learning_rate": 2.2564737807043233e-05,
+ "loss": 0.4003,
+ "step": 157250
+ },
+ {
+ "epoch": 0.8115735652999417,
+ "grad_norm": 21686.9765625,
+ "learning_rate": 2.2534205348780702e-05,
+ "loss": 0.4063,
+ "step": 157300
+ },
+ {
+ "epoch": 0.8118315352825545,
+ "grad_norm": 22949.484375,
+ "learning_rate": 2.2503687551227504e-05,
+ "loss": 0.407,
+ "step": 157350
+ },
+ {
+ "epoch": 0.8120895052651673,
+ "grad_norm": 21776.201171875,
+ "learning_rate": 2.2473184430673444e-05,
+ "loss": 0.4073,
+ "step": 157400
+ },
+ {
+ "epoch": 0.8123474752477802,
+ "grad_norm": 25641.17578125,
+ "learning_rate": 2.244269600340055e-05,
+ "loss": 0.4074,
+ "step": 157450
+ },
+ {
+ "epoch": 0.812605445230393,
+ "grad_norm": 22723.42578125,
+ "learning_rate": 2.2412222285682867e-05,
+ "loss": 0.4119,
+ "step": 157500
+ },
+ {
+ "epoch": 0.8128634152130059,
+ "grad_norm": 24244.48046875,
+ "learning_rate": 2.2381763293786746e-05,
+ "loss": 0.4157,
+ "step": 157550
+ },
+ {
+ "epoch": 0.8131213851956186,
+ "grad_norm": 26826.337890625,
+ "learning_rate": 2.235131904397058e-05,
+ "loss": 0.4102,
+ "step": 157600
+ },
+ {
+ "epoch": 0.8133793551782315,
+ "grad_norm": 23157.0546875,
+ "learning_rate": 2.232088955248491e-05,
+ "loss": 0.4121,
+ "step": 157650
+ },
+ {
+ "epoch": 0.8136373251608443,
+ "grad_norm": 23352.009765625,
+ "learning_rate": 2.229047483557245e-05,
+ "loss": 0.4054,
+ "step": 157700
+ },
+ {
+ "epoch": 0.8138952951434572,
+ "grad_norm": 24417.2734375,
+ "learning_rate": 2.2260074909467925e-05,
+ "loss": 0.4092,
+ "step": 157750
+ },
+ {
+ "epoch": 0.8141532651260699,
+ "grad_norm": 22345.669921875,
+ "learning_rate": 2.2229689790398283e-05,
+ "loss": 0.402,
+ "step": 157800
+ },
+ {
+ "epoch": 0.8144112351086827,
+ "grad_norm": 22904.20703125,
+ "learning_rate": 2.2199319494582492e-05,
+ "loss": 0.4067,
+ "step": 157850
+ },
+ {
+ "epoch": 0.8146692050912956,
+ "grad_norm": 24132.306640625,
+ "learning_rate": 2.216896403823162e-05,
+ "loss": 0.4094,
+ "step": 157900
+ },
+ {
+ "epoch": 0.8149271750739084,
+ "grad_norm": 24649.001953125,
+ "learning_rate": 2.2138623437548833e-05,
+ "loss": 0.4048,
+ "step": 157950
+ },
+ {
+ "epoch": 0.8151851450565212,
+ "grad_norm": 24956.458984375,
+ "learning_rate": 2.210829770872933e-05,
+ "loss": 0.4038,
+ "step": 158000
+ },
+ {
+ "epoch": 0.815443115039134,
+ "grad_norm": 24047.3515625,
+ "learning_rate": 2.2077986867960437e-05,
+ "loss": 0.407,
+ "step": 158050
+ },
+ {
+ "epoch": 0.8157010850217469,
+ "grad_norm": 22895.953125,
+ "learning_rate": 2.2047690931421476e-05,
+ "loss": 0.4033,
+ "step": 158100
+ },
+ {
+ "epoch": 0.8159590550043597,
+ "grad_norm": 22524.640625,
+ "learning_rate": 2.201740991528383e-05,
+ "loss": 0.4136,
+ "step": 158150
+ },
+ {
+ "epoch": 0.8162170249869725,
+ "grad_norm": 22507.46875,
+ "learning_rate": 2.1987143835710928e-05,
+ "loss": 0.4043,
+ "step": 158200
+ },
+ {
+ "epoch": 0.8164749949695853,
+ "grad_norm": 24044.5390625,
+ "learning_rate": 2.1956892708858202e-05,
+ "loss": 0.4099,
+ "step": 158250
+ },
+ {
+ "epoch": 0.8167329649521982,
+ "grad_norm": 26112.05859375,
+ "learning_rate": 2.1926656550873103e-05,
+ "loss": 0.4087,
+ "step": 158300
+ },
+ {
+ "epoch": 0.816990934934811,
+ "grad_norm": 25168.59375,
+ "learning_rate": 2.189643537789517e-05,
+ "loss": 0.4059,
+ "step": 158350
+ },
+ {
+ "epoch": 0.8172489049174239,
+ "grad_norm": 31289.392578125,
+ "learning_rate": 2.1866229206055804e-05,
+ "loss": 0.4048,
+ "step": 158400
+ },
+ {
+ "epoch": 0.8175068749000366,
+ "grad_norm": 27301.970703125,
+ "learning_rate": 2.1836038051478508e-05,
+ "loss": 0.4111,
+ "step": 158450
+ },
+ {
+ "epoch": 0.8177648448826494,
+ "grad_norm": 22742.66015625,
+ "learning_rate": 2.180586193027877e-05,
+ "loss": 0.3998,
+ "step": 158500
+ },
+ {
+ "epoch": 0.8180228148652623,
+ "grad_norm": 26745.51171875,
+ "learning_rate": 2.177570085856395e-05,
+ "loss": 0.4069,
+ "step": 158550
+ },
+ {
+ "epoch": 0.8182807848478751,
+ "grad_norm": 24821.93359375,
+ "learning_rate": 2.1745554852433502e-05,
+ "loss": 0.4057,
+ "step": 158600
+ },
+ {
+ "epoch": 0.8185387548304879,
+ "grad_norm": 24082.908203125,
+ "learning_rate": 2.1715423927978755e-05,
+ "loss": 0.4042,
+ "step": 158650
+ },
+ {
+ "epoch": 0.8187967248131007,
+ "grad_norm": 23584.001953125,
+ "learning_rate": 2.168530810128302e-05,
+ "loss": 0.4062,
+ "step": 158700
+ },
+ {
+ "epoch": 0.8190546947957136,
+ "grad_norm": 25795.326171875,
+ "learning_rate": 2.1655207388421532e-05,
+ "loss": 0.4101,
+ "step": 158750
+ },
+ {
+ "epoch": 0.8193126647783264,
+ "grad_norm": 22298.908203125,
+ "learning_rate": 2.1625121805461483e-05,
+ "loss": 0.4004,
+ "step": 158800
+ },
+ {
+ "epoch": 0.8195706347609392,
+ "grad_norm": 24439.970703125,
+ "learning_rate": 2.1595051368461943e-05,
+ "loss": 0.4078,
+ "step": 158850
+ },
+ {
+ "epoch": 0.819828604743552,
+ "grad_norm": 24895.5546875,
+ "learning_rate": 2.1564996093473975e-05,
+ "loss": 0.4008,
+ "step": 158900
+ },
+ {
+ "epoch": 0.8200865747261649,
+ "grad_norm": 27615.1171875,
+ "learning_rate": 2.153495599654048e-05,
+ "loss": 0.4051,
+ "step": 158950
+ },
+ {
+ "epoch": 0.8203445447087777,
+ "grad_norm": 22537.25390625,
+ "learning_rate": 2.150493109369628e-05,
+ "loss": 0.4078,
+ "step": 159000
+ },
+ {
+ "epoch": 0.8206025146913906,
+ "grad_norm": 23422.39453125,
+ "learning_rate": 2.1474921400968085e-05,
+ "loss": 0.3999,
+ "step": 159050
+ },
+ {
+ "epoch": 0.8208604846740033,
+ "grad_norm": 24678.099609375,
+ "learning_rate": 2.1444926934374475e-05,
+ "loss": 0.4038,
+ "step": 159100
+ },
+ {
+ "epoch": 0.8211184546566161,
+ "grad_norm": 25680.623046875,
+ "learning_rate": 2.1414947709925963e-05,
+ "loss": 0.4082,
+ "step": 159150
+ },
+ {
+ "epoch": 0.821376424639229,
+ "grad_norm": 26526.724609375,
+ "learning_rate": 2.1384983743624813e-05,
+ "loss": 0.4076,
+ "step": 159200
+ },
+ {
+ "epoch": 0.8216343946218418,
+ "grad_norm": 21391.701171875,
+ "learning_rate": 2.1355035051465265e-05,
+ "loss": 0.4003,
+ "step": 159250
+ },
+ {
+ "epoch": 0.8218923646044546,
+ "grad_norm": 22676.607421875,
+ "learning_rate": 2.1325101649433327e-05,
+ "loss": 0.4087,
+ "step": 159300
+ },
+ {
+ "epoch": 0.8221503345870674,
+ "grad_norm": 23139.802734375,
+ "learning_rate": 2.1295183553506855e-05,
+ "loss": 0.4102,
+ "step": 159350
+ },
+ {
+ "epoch": 0.8224083045696803,
+ "grad_norm": 23598.369140625,
+ "learning_rate": 2.1265280779655593e-05,
+ "loss": 0.4027,
+ "step": 159400
+ },
+ {
+ "epoch": 0.8226662745522931,
+ "grad_norm": 24068.453125,
+ "learning_rate": 2.1235393343841008e-05,
+ "loss": 0.4097,
+ "step": 159450
+ },
+ {
+ "epoch": 0.8229242445349059,
+ "grad_norm": 26833.779296875,
+ "learning_rate": 2.1205521262016476e-05,
+ "loss": 0.4094,
+ "step": 159500
+ },
+ {
+ "epoch": 0.8231822145175187,
+ "grad_norm": 21122.98046875,
+ "learning_rate": 2.1175664550127123e-05,
+ "loss": 0.4074,
+ "step": 159550
+ },
+ {
+ "epoch": 0.8234401845001316,
+ "grad_norm": 24398.310546875,
+ "learning_rate": 2.1145823224109884e-05,
+ "loss": 0.4081,
+ "step": 159600
+ },
+ {
+ "epoch": 0.8236981544827444,
+ "grad_norm": 20830.05078125,
+ "learning_rate": 2.111599729989348e-05,
+ "loss": 0.4031,
+ "step": 159650
+ },
+ {
+ "epoch": 0.8239561244653573,
+ "grad_norm": 24353.29296875,
+ "learning_rate": 2.108618679339841e-05,
+ "loss": 0.4037,
+ "step": 159700
+ },
+ {
+ "epoch": 0.82421409444797,
+ "grad_norm": 22828.130859375,
+ "learning_rate": 2.1056391720536928e-05,
+ "loss": 0.4021,
+ "step": 159750
+ },
+ {
+ "epoch": 0.8244720644305829,
+ "grad_norm": 21661.53515625,
+ "learning_rate": 2.1026612097213106e-05,
+ "loss": 0.4117,
+ "step": 159800
+ },
+ {
+ "epoch": 0.8247300344131957,
+ "grad_norm": 20191.279296875,
+ "learning_rate": 2.0996847939322707e-05,
+ "loss": 0.4088,
+ "step": 159850
+ },
+ {
+ "epoch": 0.8249880043958085,
+ "grad_norm": 23767.8125,
+ "learning_rate": 2.0967099262753258e-05,
+ "loss": 0.4035,
+ "step": 159900
+ },
+ {
+ "epoch": 0.8252459743784213,
+ "grad_norm": 24693.4609375,
+ "learning_rate": 2.093736608338405e-05,
+ "loss": 0.4135,
+ "step": 159950
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "grad_norm": 22759.341796875,
+ "learning_rate": 2.0907648417086027e-05,
+ "loss": 0.4048,
+ "step": 160000
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "eval_loss": 0.3925068974494934,
+ "eval_runtime": 3187.046,
+ "eval_samples_per_second": 973.039,
+ "eval_steps_per_second": 1.901,
+ "step": 160000
+ },
+ {
+ "epoch": 0.825761914343647,
+ "grad_norm": 25066.45703125,
+ "learning_rate": 2.0877946279721983e-05,
+ "loss": 0.4017,
+ "step": 160050
+ },
+ {
+ "epoch": 0.8260198843262598,
+ "grad_norm": 24734.384765625,
+ "learning_rate": 2.084825968714626e-05,
+ "loss": 0.4091,
+ "step": 160100
+ },
+ {
+ "epoch": 0.8262778543088726,
+ "grad_norm": 26498.201171875,
+ "learning_rate": 2.0818588655205045e-05,
+ "loss": 0.4028,
+ "step": 160150
+ },
+ {
+ "epoch": 0.8265358242914854,
+ "grad_norm": 23436.36328125,
+ "learning_rate": 2.0788933199736143e-05,
+ "loss": 0.4019,
+ "step": 160200
+ },
+ {
+ "epoch": 0.8267937942740983,
+ "grad_norm": 23851.89453125,
+ "learning_rate": 2.075929333656904e-05,
+ "loss": 0.4055,
+ "step": 160250
+ },
+ {
+ "epoch": 0.8270517642567111,
+ "grad_norm": 23416.0625,
+ "learning_rate": 2.0729669081524977e-05,
+ "loss": 0.4075,
+ "step": 160300
+ },
+ {
+ "epoch": 0.8273097342393239,
+ "grad_norm": 22208.994140625,
+ "learning_rate": 2.070006045041673e-05,
+ "loss": 0.4047,
+ "step": 160350
+ },
+ {
+ "epoch": 0.8275677042219367,
+ "grad_norm": 21291.3515625,
+ "learning_rate": 2.067046745904888e-05,
+ "loss": 0.405,
+ "step": 160400
+ },
+ {
+ "epoch": 0.8278256742045496,
+ "grad_norm": 24646.279296875,
+ "learning_rate": 2.0640890123217565e-05,
+ "loss": 0.4076,
+ "step": 160450
+ },
+ {
+ "epoch": 0.8280836441871624,
+ "grad_norm": 22018.609375,
+ "learning_rate": 2.0611328458710595e-05,
+ "loss": 0.406,
+ "step": 160500
+ },
+ {
+ "epoch": 0.8283416141697753,
+ "grad_norm": 30070.40234375,
+ "learning_rate": 2.0581782481307415e-05,
+ "loss": 0.4099,
+ "step": 160550
+ },
+ {
+ "epoch": 0.828599584152388,
+ "grad_norm": 24574.34375,
+ "learning_rate": 2.0552252206779098e-05,
+ "loss": 0.4035,
+ "step": 160600
+ },
+ {
+ "epoch": 0.8288575541350008,
+ "grad_norm": 23137.224609375,
+ "learning_rate": 2.0522737650888313e-05,
+ "loss": 0.4006,
+ "step": 160650
+ },
+ {
+ "epoch": 0.8291155241176137,
+ "grad_norm": 22633.23828125,
+ "learning_rate": 2.0493238829389393e-05,
+ "loss": 0.4064,
+ "step": 160700
+ },
+ {
+ "epoch": 0.8293734941002265,
+ "grad_norm": 23670.525390625,
+ "learning_rate": 2.046375575802822e-05,
+ "loss": 0.4084,
+ "step": 160750
+ },
+ {
+ "epoch": 0.8296314640828393,
+ "grad_norm": 24236.7890625,
+ "learning_rate": 2.043428845254229e-05,
+ "loss": 0.413,
+ "step": 160800
+ },
+ {
+ "epoch": 0.8298894340654521,
+ "grad_norm": 25734.12890625,
+ "learning_rate": 2.0404836928660676e-05,
+ "loss": 0.3992,
+ "step": 160850
+ },
+ {
+ "epoch": 0.830147404048065,
+ "grad_norm": 23417.83203125,
+ "learning_rate": 2.037540120210401e-05,
+ "loss": 0.4069,
+ "step": 160900
+ },
+ {
+ "epoch": 0.8304053740306778,
+ "grad_norm": 24619.853515625,
+ "learning_rate": 2.0345981288584575e-05,
+ "loss": 0.4002,
+ "step": 160950
+ },
+ {
+ "epoch": 0.8306633440132906,
+ "grad_norm": 21862.111328125,
+ "learning_rate": 2.031657720380608e-05,
+ "loss": 0.4012,
+ "step": 161000
+ },
+ {
+ "epoch": 0.8309213139959034,
+ "grad_norm": 23347.91015625,
+ "learning_rate": 2.0287188963463906e-05,
+ "loss": 0.4061,
+ "step": 161050
+ },
+ {
+ "epoch": 0.8311792839785163,
+ "grad_norm": 25119.107421875,
+ "learning_rate": 2.02578165832449e-05,
+ "loss": 0.4061,
+ "step": 161100
+ },
+ {
+ "epoch": 0.8314372539611291,
+ "grad_norm": 22684.50390625,
+ "learning_rate": 2.0228460078827466e-05,
+ "loss": 0.4062,
+ "step": 161150
+ },
+ {
+ "epoch": 0.831695223943742,
+ "grad_norm": 39309.30859375,
+ "learning_rate": 2.0199119465881565e-05,
+ "loss": 0.4091,
+ "step": 161200
+ },
+ {
+ "epoch": 0.8319531939263547,
+ "grad_norm": 22076.8125,
+ "learning_rate": 2.0169794760068632e-05,
+ "loss": 0.4052,
+ "step": 161250
+ },
+ {
+ "epoch": 0.8322111639089675,
+ "grad_norm": 26682.44140625,
+ "learning_rate": 2.0140485977041636e-05,
+ "loss": 0.405,
+ "step": 161300
+ },
+ {
+ "epoch": 0.8324691338915804,
+ "grad_norm": 24586.09375,
+ "learning_rate": 2.011119313244502e-05,
+ "loss": 0.4066,
+ "step": 161350
+ },
+ {
+ "epoch": 0.8327271038741932,
+ "grad_norm": 26363.5703125,
+ "learning_rate": 2.008191624191475e-05,
+ "loss": 0.4027,
+ "step": 161400
+ },
+ {
+ "epoch": 0.832985073856806,
+ "grad_norm": 24361.9921875,
+ "learning_rate": 2.0052655321078246e-05,
+ "loss": 0.4041,
+ "step": 161450
+ },
+ {
+ "epoch": 0.8332430438394188,
+ "grad_norm": 22026.951171875,
+ "learning_rate": 2.0023410385554466e-05,
+ "loss": 0.4068,
+ "step": 161500
+ },
+ {
+ "epoch": 0.8335010138220317,
+ "grad_norm": 24540.068359375,
+ "learning_rate": 1.9994181450953725e-05,
+ "loss": 0.4036,
+ "step": 161550
+ },
+ {
+ "epoch": 0.8337589838046445,
+ "grad_norm": 25837.857421875,
+ "learning_rate": 1.9964968532877916e-05,
+ "loss": 0.4052,
+ "step": 161600
+ },
+ {
+ "epoch": 0.8340169537872573,
+ "grad_norm": 23252.900390625,
+ "learning_rate": 1.993577164692031e-05,
+ "loss": 0.4021,
+ "step": 161650
+ },
+ {
+ "epoch": 0.8342749237698701,
+ "grad_norm": 25305.177734375,
+ "learning_rate": 1.990659080866562e-05,
+ "loss": 0.4089,
+ "step": 161700
+ },
+ {
+ "epoch": 0.834532893752483,
+ "grad_norm": 25317.89453125,
+ "learning_rate": 1.9877426033690066e-05,
+ "loss": 0.4082,
+ "step": 161750
+ },
+ {
+ "epoch": 0.8347908637350958,
+ "grad_norm": 25872.2109375,
+ "learning_rate": 1.984827733756117e-05,
+ "loss": 0.4021,
+ "step": 161800
+ },
+ {
+ "epoch": 0.8350488337177087,
+ "grad_norm": 23915.955078125,
+ "learning_rate": 1.9819144735837998e-05,
+ "loss": 0.4054,
+ "step": 161850
+ },
+ {
+ "epoch": 0.8353068037003214,
+ "grad_norm": 25145.380859375,
+ "learning_rate": 1.9790028244070946e-05,
+ "loss": 0.4119,
+ "step": 161900
+ },
+ {
+ "epoch": 0.8355647736829342,
+ "grad_norm": 24318.28125,
+ "learning_rate": 1.976092787780184e-05,
+ "loss": 0.4015,
+ "step": 161950
+ },
+ {
+ "epoch": 0.8358227436655471,
+ "grad_norm": 22675.845703125,
+ "learning_rate": 1.973184365256388e-05,
+ "loss": 0.4107,
+ "step": 162000
+ },
+ {
+ "epoch": 0.83608071364816,
+ "grad_norm": 23785.451171875,
+ "learning_rate": 1.9702775583881656e-05,
+ "loss": 0.408,
+ "step": 162050
+ },
+ {
+ "epoch": 0.8363386836307727,
+ "grad_norm": 22790.47265625,
+ "learning_rate": 1.9673723687271174e-05,
+ "loss": 0.406,
+ "step": 162100
+ },
+ {
+ "epoch": 0.8365966536133855,
+ "grad_norm": 24380.498046875,
+ "learning_rate": 1.9644687978239746e-05,
+ "loss": 0.4105,
+ "step": 162150
+ },
+ {
+ "epoch": 0.8368546235959984,
+ "grad_norm": 23812.814453125,
+ "learning_rate": 1.9615668472286085e-05,
+ "loss": 0.4032,
+ "step": 162200
+ },
+ {
+ "epoch": 0.8371125935786112,
+ "grad_norm": 22820.734375,
+ "learning_rate": 1.9586665184900232e-05,
+ "loss": 0.4072,
+ "step": 162250
+ },
+ {
+ "epoch": 0.837370563561224,
+ "grad_norm": 22347.779296875,
+ "learning_rate": 1.955767813156359e-05,
+ "loss": 0.4045,
+ "step": 162300
+ },
+ {
+ "epoch": 0.8376285335438368,
+ "grad_norm": 24328.546875,
+ "learning_rate": 1.9528707327748852e-05,
+ "loss": 0.4097,
+ "step": 162350
+ },
+ {
+ "epoch": 0.8378865035264497,
+ "grad_norm": 23850.13671875,
+ "learning_rate": 1.9499752788920146e-05,
+ "loss": 0.4085,
+ "step": 162400
+ },
+ {
+ "epoch": 0.8381444735090625,
+ "grad_norm": 24967.3203125,
+ "learning_rate": 1.9470814530532756e-05,
+ "loss": 0.4056,
+ "step": 162450
+ },
+ {
+ "epoch": 0.8384024434916753,
+ "grad_norm": 23740.197265625,
+ "learning_rate": 1.9441892568033426e-05,
+ "loss": 0.4112,
+ "step": 162500
+ },
+ {
+ "epoch": 0.8386604134742881,
+ "grad_norm": 26039.447265625,
+ "learning_rate": 1.941298691686012e-05,
+ "loss": 0.405,
+ "step": 162550
+ },
+ {
+ "epoch": 0.838918383456901,
+ "grad_norm": 22781.23828125,
+ "learning_rate": 1.9384097592442102e-05,
+ "loss": 0.4043,
+ "step": 162600
+ },
+ {
+ "epoch": 0.8391763534395138,
+ "grad_norm": 25735.17578125,
+ "learning_rate": 1.935522461019998e-05,
+ "loss": 0.4021,
+ "step": 162650
+ },
+ {
+ "epoch": 0.8394343234221266,
+ "grad_norm": 26452.810546875,
+ "learning_rate": 1.932636798554552e-05,
+ "loss": 0.4093,
+ "step": 162700
+ },
+ {
+ "epoch": 0.8396922934047394,
+ "grad_norm": 24199.3515625,
+ "learning_rate": 1.929752773388189e-05,
+ "loss": 0.4003,
+ "step": 162750
+ },
+ {
+ "epoch": 0.8399502633873522,
+ "grad_norm": 27610.30859375,
+ "learning_rate": 1.9268703870603434e-05,
+ "loss": 0.4035,
+ "step": 162800
+ },
+ {
+ "epoch": 0.8402082333699651,
+ "grad_norm": 23799.3359375,
+ "learning_rate": 1.9239896411095777e-05,
+ "loss": 0.4072,
+ "step": 162850
+ },
+ {
+ "epoch": 0.8404662033525779,
+ "grad_norm": 24182.162109375,
+ "learning_rate": 1.9211105370735784e-05,
+ "loss": 0.4056,
+ "step": 162900
+ },
+ {
+ "epoch": 0.8407241733351907,
+ "grad_norm": 21251.0625,
+ "learning_rate": 1.918233076489153e-05,
+ "loss": 0.4073,
+ "step": 162950
+ },
+ {
+ "epoch": 0.8409821433178035,
+ "grad_norm": 22723.09765625,
+ "learning_rate": 1.9153572608922383e-05,
+ "loss": 0.4041,
+ "step": 163000
+ },
+ {
+ "epoch": 0.8412401133004164,
+ "grad_norm": 23557.125,
+ "learning_rate": 1.9124830918178876e-05,
+ "loss": 0.4064,
+ "step": 163050
+ },
+ {
+ "epoch": 0.8414980832830292,
+ "grad_norm": 24273.71484375,
+ "learning_rate": 1.9096105708002754e-05,
+ "loss": 0.4072,
+ "step": 163100
+ },
+ {
+ "epoch": 0.841756053265642,
+ "grad_norm": 24078.10546875,
+ "learning_rate": 1.9067396993726994e-05,
+ "loss": 0.409,
+ "step": 163150
+ },
+ {
+ "epoch": 0.8420140232482548,
+ "grad_norm": 23370.31640625,
+ "learning_rate": 1.9038704790675738e-05,
+ "loss": 0.4082,
+ "step": 163200
+ },
+ {
+ "epoch": 0.8422719932308677,
+ "grad_norm": 23478.564453125,
+ "learning_rate": 1.901002911416432e-05,
+ "loss": 0.4082,
+ "step": 163250
+ },
+ {
+ "epoch": 0.8425299632134805,
+ "grad_norm": 22697.802734375,
+ "learning_rate": 1.898136997949929e-05,
+ "loss": 0.4107,
+ "step": 163300
+ },
+ {
+ "epoch": 0.8427879331960934,
+ "grad_norm": 25571.9765625,
+ "learning_rate": 1.8952727401978326e-05,
+ "loss": 0.3996,
+ "step": 163350
+ },
+ {
+ "epoch": 0.8430459031787061,
+ "grad_norm": 24950.283203125,
+ "learning_rate": 1.8924101396890264e-05,
+ "loss": 0.403,
+ "step": 163400
+ },
+ {
+ "epoch": 0.8433038731613189,
+ "grad_norm": 22436.380859375,
+ "learning_rate": 1.8895491979515162e-05,
+ "loss": 0.4041,
+ "step": 163450
+ },
+ {
+ "epoch": 0.8435618431439318,
+ "grad_norm": 25954.529296875,
+ "learning_rate": 1.8866899165124097e-05,
+ "loss": 0.4003,
+ "step": 163500
+ },
+ {
+ "epoch": 0.8438198131265446,
+ "grad_norm": 21477.8828125,
+ "learning_rate": 1.883832296897944e-05,
+ "loss": 0.4063,
+ "step": 163550
+ },
+ {
+ "epoch": 0.8440777831091574,
+ "grad_norm": 24669.7890625,
+ "learning_rate": 1.8809763406334535e-05,
+ "loss": 0.4049,
+ "step": 163600
+ },
+ {
+ "epoch": 0.8443357530917702,
+ "grad_norm": 27181.50390625,
+ "learning_rate": 1.878122049243398e-05,
+ "loss": 0.4007,
+ "step": 163650
+ },
+ {
+ "epoch": 0.8445937230743831,
+ "grad_norm": 25191.591796875,
+ "learning_rate": 1.8752694242513408e-05,
+ "loss": 0.4072,
+ "step": 163700
+ },
+ {
+ "epoch": 0.8448516930569959,
+ "grad_norm": 24557.42578125,
+ "learning_rate": 1.872418467179956e-05,
+ "loss": 0.4043,
+ "step": 163750
+ },
+ {
+ "epoch": 0.8451096630396087,
+ "grad_norm": 25135.6328125,
+ "learning_rate": 1.8695691795510335e-05,
+ "loss": 0.4008,
+ "step": 163800
+ },
+ {
+ "epoch": 0.8453676330222215,
+ "grad_norm": 23372.181640625,
+ "learning_rate": 1.8667215628854656e-05,
+ "loss": 0.4073,
+ "step": 163850
+ },
+ {
+ "epoch": 0.8456256030048344,
+ "grad_norm": 23332.65625,
+ "learning_rate": 1.8638756187032554e-05,
+ "loss": 0.3987,
+ "step": 163900
+ },
+ {
+ "epoch": 0.8458835729874472,
+ "grad_norm": 23423.669921875,
+ "learning_rate": 1.861031348523512e-05,
+ "loss": 0.4066,
+ "step": 163950
+ },
+ {
+ "epoch": 0.8461415429700601,
+ "grad_norm": 25873.208984375,
+ "learning_rate": 1.858188753864452e-05,
+ "loss": 0.4015,
+ "step": 164000
+ },
+ {
+ "epoch": 0.8463995129526728,
+ "grad_norm": 24766.4140625,
+ "learning_rate": 1.8553478362433964e-05,
+ "loss": 0.4076,
+ "step": 164050
+ },
+ {
+ "epoch": 0.8466574829352856,
+ "grad_norm": 25044.45703125,
+ "learning_rate": 1.852508597176776e-05,
+ "loss": 0.3972,
+ "step": 164100
+ },
+ {
+ "epoch": 0.8469154529178985,
+ "grad_norm": 23699.478515625,
+ "learning_rate": 1.8496710381801157e-05,
+ "loss": 0.3953,
+ "step": 164150
+ },
+ {
+ "epoch": 0.8471734229005113,
+ "grad_norm": 22853.53125,
+ "learning_rate": 1.8468351607680546e-05,
+ "loss": 0.4095,
+ "step": 164200
+ },
+ {
+ "epoch": 0.8474313928831241,
+ "grad_norm": 21374.96875,
+ "learning_rate": 1.8440009664543267e-05,
+ "loss": 0.4092,
+ "step": 164250
+ },
+ {
+ "epoch": 0.8476893628657369,
+ "grad_norm": 22454.515625,
+ "learning_rate": 1.8411684567517694e-05,
+ "loss": 0.4005,
+ "step": 164300
+ },
+ {
+ "epoch": 0.8479473328483498,
+ "grad_norm": 23134.24609375,
+ "learning_rate": 1.8383376331723258e-05,
+ "loss": 0.4041,
+ "step": 164350
+ },
+ {
+ "epoch": 0.8482053028309626,
+ "grad_norm": 23000.69921875,
+ "learning_rate": 1.835508497227028e-05,
+ "loss": 0.4056,
+ "step": 164400
+ },
+ {
+ "epoch": 0.8484632728135754,
+ "grad_norm": 23213.333984375,
+ "learning_rate": 1.8326810504260194e-05,
+ "loss": 0.4076,
+ "step": 164450
+ },
+ {
+ "epoch": 0.8487212427961882,
+ "grad_norm": 24883.953125,
+ "learning_rate": 1.8298552942785353e-05,
+ "loss": 0.4023,
+ "step": 164500
+ },
+ {
+ "epoch": 0.8489792127788011,
+ "grad_norm": 23075.015625,
+ "learning_rate": 1.827031230292908e-05,
+ "loss": 0.4095,
+ "step": 164550
+ },
+ {
+ "epoch": 0.8492371827614139,
+ "grad_norm": 24055.23828125,
+ "learning_rate": 1.824208859976569e-05,
+ "loss": 0.4034,
+ "step": 164600
+ },
+ {
+ "epoch": 0.8494951527440268,
+ "grad_norm": 24572.919921875,
+ "learning_rate": 1.8213881848360438e-05,
+ "loss": 0.4106,
+ "step": 164650
+ },
+ {
+ "epoch": 0.8497531227266395,
+ "grad_norm": 26111.40234375,
+ "learning_rate": 1.8185692063769566e-05,
+ "loss": 0.4051,
+ "step": 164700
+ },
+ {
+ "epoch": 0.8500110927092523,
+ "grad_norm": 22763.25,
+ "learning_rate": 1.8157519261040222e-05,
+ "loss": 0.4019,
+ "step": 164750
+ },
+ {
+ "epoch": 0.8502690626918652,
+ "grad_norm": 22230.16796875,
+ "learning_rate": 1.8129363455210503e-05,
+ "loss": 0.4085,
+ "step": 164800
+ },
+ {
+ "epoch": 0.850527032674478,
+ "grad_norm": 24729.40234375,
+ "learning_rate": 1.8101224661309435e-05,
+ "loss": 0.4042,
+ "step": 164850
+ },
+ {
+ "epoch": 0.8507850026570908,
+ "grad_norm": 23329.431640625,
+ "learning_rate": 1.807310289435696e-05,
+ "loss": 0.405,
+ "step": 164900
+ },
+ {
+ "epoch": 0.8510429726397036,
+ "grad_norm": 24267.970703125,
+ "learning_rate": 1.8044998169363908e-05,
+ "loss": 0.406,
+ "step": 164950
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "grad_norm": 23587.689453125,
+ "learning_rate": 1.80169105013321e-05,
+ "loss": 0.4069,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "eval_loss": 0.3912332057952881,
+ "eval_runtime": 3189.1337,
+ "eval_samples_per_second": 972.402,
+ "eval_steps_per_second": 1.899,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8515589126049293,
+ "grad_norm": 23356.634765625,
+ "learning_rate": 1.798883990525412e-05,
+ "loss": 0.4022,
+ "step": 165050
+ },
+ {
+ "epoch": 0.8518168825875421,
+ "grad_norm": 23850.75,
+ "learning_rate": 1.7960786396113542e-05,
+ "loss": 0.3984,
+ "step": 165100
+ },
+ {
+ "epoch": 0.8520748525701549,
+ "grad_norm": 23898.03125,
+ "learning_rate": 1.7932749988884795e-05,
+ "loss": 0.4035,
+ "step": 165150
+ },
+ {
+ "epoch": 0.8523328225527678,
+ "grad_norm": 23517.4453125,
+ "learning_rate": 1.790473069853314e-05,
+ "loss": 0.4061,
+ "step": 165200
+ },
+ {
+ "epoch": 0.8525907925353806,
+ "grad_norm": 24264.568359375,
+ "learning_rate": 1.787672854001478e-05,
+ "loss": 0.4076,
+ "step": 165250
+ },
+ {
+ "epoch": 0.8528487625179934,
+ "grad_norm": 23741.220703125,
+ "learning_rate": 1.7848743528276663e-05,
+ "loss": 0.4063,
+ "step": 165300
+ },
+ {
+ "epoch": 0.8531067325006062,
+ "grad_norm": 25368.697265625,
+ "learning_rate": 1.782077567825669e-05,
+ "loss": 0.4027,
+ "step": 165350
+ },
+ {
+ "epoch": 0.853364702483219,
+ "grad_norm": 21610.12890625,
+ "learning_rate": 1.779282500488355e-05,
+ "loss": 0.4067,
+ "step": 165400
+ },
+ {
+ "epoch": 0.8536226724658319,
+ "grad_norm": 26066.560546875,
+ "learning_rate": 1.7764891523076766e-05,
+ "loss": 0.4091,
+ "step": 165450
+ },
+ {
+ "epoch": 0.8538806424484447,
+ "grad_norm": 22909.5234375,
+ "learning_rate": 1.773697524774669e-05,
+ "loss": 0.4035,
+ "step": 165500
+ },
+ {
+ "epoch": 0.8541386124310575,
+ "grad_norm": 23672.54296875,
+ "learning_rate": 1.7709076193794478e-05,
+ "loss": 0.407,
+ "step": 165550
+ },
+ {
+ "epoch": 0.8543965824136703,
+ "grad_norm": 22466.203125,
+ "learning_rate": 1.7681194376112125e-05,
+ "loss": 0.4057,
+ "step": 165600
+ },
+ {
+ "epoch": 0.8546545523962832,
+ "grad_norm": 23236.4296875,
+ "learning_rate": 1.7653329809582404e-05,
+ "loss": 0.4058,
+ "step": 165650
+ },
+ {
+ "epoch": 0.854912522378896,
+ "grad_norm": 23181.5,
+ "learning_rate": 1.7625482509078873e-05,
+ "loss": 0.4007,
+ "step": 165700
+ },
+ {
+ "epoch": 0.8551704923615088,
+ "grad_norm": 20621.5,
+ "learning_rate": 1.7597652489465877e-05,
+ "loss": 0.4053,
+ "step": 165750
+ },
+ {
+ "epoch": 0.8554284623441216,
+ "grad_norm": 23911.7734375,
+ "learning_rate": 1.756983976559855e-05,
+ "loss": 0.4043,
+ "step": 165800
+ },
+ {
+ "epoch": 0.8556864323267345,
+ "grad_norm": 21440.978515625,
+ "learning_rate": 1.7542044352322768e-05,
+ "loss": 0.4076,
+ "step": 165850
+ },
+ {
+ "epoch": 0.8559444023093473,
+ "grad_norm": 22439.712890625,
+ "learning_rate": 1.7514266264475233e-05,
+ "loss": 0.3999,
+ "step": 165900
+ },
+ {
+ "epoch": 0.8562023722919601,
+ "grad_norm": 24814.876953125,
+ "learning_rate": 1.748650551688328e-05,
+ "loss": 0.405,
+ "step": 165950
+ },
+ {
+ "epoch": 0.8564603422745729,
+ "grad_norm": 21705.185546875,
+ "learning_rate": 1.7458762124365096e-05,
+ "loss": 0.4007,
+ "step": 166000
+ },
+ {
+ "epoch": 0.8567183122571858,
+ "grad_norm": 25317.05078125,
+ "learning_rate": 1.7431036101729604e-05,
+ "loss": 0.4036,
+ "step": 166050
+ },
+ {
+ "epoch": 0.8569762822397986,
+ "grad_norm": 23984.142578125,
+ "learning_rate": 1.7403327463776343e-05,
+ "loss": 0.4027,
+ "step": 166100
+ },
+ {
+ "epoch": 0.8572342522224115,
+ "grad_norm": 24149.794921875,
+ "learning_rate": 1.7375636225295716e-05,
+ "loss": 0.3986,
+ "step": 166150
+ },
+ {
+ "epoch": 0.8574922222050242,
+ "grad_norm": 20085.748046875,
+ "learning_rate": 1.73479624010687e-05,
+ "loss": 0.4032,
+ "step": 166200
+ },
+ {
+ "epoch": 0.857750192187637,
+ "grad_norm": 25550.01171875,
+ "learning_rate": 1.732030600586711e-05,
+ "loss": 0.4067,
+ "step": 166250
+ },
+ {
+ "epoch": 0.8580081621702499,
+ "grad_norm": 23439.69921875,
+ "learning_rate": 1.7292667054453364e-05,
+ "loss": 0.4058,
+ "step": 166300
+ },
+ {
+ "epoch": 0.8582661321528627,
+ "grad_norm": 24064.46484375,
+ "learning_rate": 1.7265045561580606e-05,
+ "loss": 0.406,
+ "step": 166350
+ },
+ {
+ "epoch": 0.8585241021354755,
+ "grad_norm": 27679.162109375,
+ "learning_rate": 1.723744154199264e-05,
+ "loss": 0.403,
+ "step": 166400
+ },
+ {
+ "epoch": 0.8587820721180883,
+ "grad_norm": 21371.59765625,
+ "learning_rate": 1.7209855010423977e-05,
+ "loss": 0.4103,
+ "step": 166450
+ },
+ {
+ "epoch": 0.8590400421007012,
+ "grad_norm": 24340.283203125,
+ "learning_rate": 1.7182285981599766e-05,
+ "loss": 0.4073,
+ "step": 166500
+ },
+ {
+ "epoch": 0.859298012083314,
+ "grad_norm": 22603.62109375,
+ "learning_rate": 1.7154734470235823e-05,
+ "loss": 0.4026,
+ "step": 166550
+ },
+ {
+ "epoch": 0.8595559820659268,
+ "grad_norm": 21442.248046875,
+ "learning_rate": 1.7127200491038607e-05,
+ "loss": 0.4089,
+ "step": 166600
+ },
+ {
+ "epoch": 0.8598139520485396,
+ "grad_norm": 22127.478515625,
+ "learning_rate": 1.7099684058705212e-05,
+ "loss": 0.4073,
+ "step": 166650
+ },
+ {
+ "epoch": 0.8600719220311525,
+ "grad_norm": 37660.0859375,
+ "learning_rate": 1.707218518792342e-05,
+ "loss": 0.404,
+ "step": 166700
+ },
+ {
+ "epoch": 0.8603298920137653,
+ "grad_norm": 23772.982421875,
+ "learning_rate": 1.704470389337153e-05,
+ "loss": 0.4004,
+ "step": 166750
+ },
+ {
+ "epoch": 0.8605878619963782,
+ "grad_norm": 24957.23828125,
+ "learning_rate": 1.7017240189718575e-05,
+ "loss": 0.4025,
+ "step": 166800
+ },
+ {
+ "epoch": 0.8608458319789909,
+ "grad_norm": 25014.044921875,
+ "learning_rate": 1.6989794091624138e-05,
+ "loss": 0.4037,
+ "step": 166850
+ },
+ {
+ "epoch": 0.8611038019616037,
+ "grad_norm": 23370.162109375,
+ "learning_rate": 1.696236561373839e-05,
+ "loss": 0.4043,
+ "step": 166900
+ },
+ {
+ "epoch": 0.8613617719442166,
+ "grad_norm": 25212.830078125,
+ "learning_rate": 1.693495477070217e-05,
+ "loss": 0.3997,
+ "step": 166950
+ },
+ {
+ "epoch": 0.8616197419268294,
+ "grad_norm": 22828.701171875,
+ "learning_rate": 1.69075615771468e-05,
+ "loss": 0.4063,
+ "step": 167000
+ },
+ {
+ "epoch": 0.8618777119094422,
+ "grad_norm": 23862.4375,
+ "learning_rate": 1.6880186047694274e-05,
+ "loss": 0.4044,
+ "step": 167050
+ },
+ {
+ "epoch": 0.862135681892055,
+ "grad_norm": 25248.44140625,
+ "learning_rate": 1.685282819695711e-05,
+ "loss": 0.4072,
+ "step": 167100
+ },
+ {
+ "epoch": 0.8623936518746679,
+ "grad_norm": 24765.2421875,
+ "learning_rate": 1.68254880395384e-05,
+ "loss": 0.4055,
+ "step": 167150
+ },
+ {
+ "epoch": 0.8626516218572807,
+ "grad_norm": 22687.32421875,
+ "learning_rate": 1.6798165590031783e-05,
+ "loss": 0.4076,
+ "step": 167200
+ },
+ {
+ "epoch": 0.8629095918398935,
+ "grad_norm": 28427.16015625,
+ "learning_rate": 1.677086086302146e-05,
+ "loss": 0.3985,
+ "step": 167250
+ },
+ {
+ "epoch": 0.8631675618225063,
+ "grad_norm": 24114.146484375,
+ "learning_rate": 1.6743573873082147e-05,
+ "loss": 0.3993,
+ "step": 167300
+ },
+ {
+ "epoch": 0.8634255318051192,
+ "grad_norm": 22007.857421875,
+ "learning_rate": 1.6716304634779144e-05,
+ "loss": 0.4054,
+ "step": 167350
+ },
+ {
+ "epoch": 0.863683501787732,
+ "grad_norm": 24888.619140625,
+ "learning_rate": 1.6689053162668226e-05,
+ "loss": 0.3983,
+ "step": 167400
+ },
+ {
+ "epoch": 0.8639414717703447,
+ "grad_norm": 23306.1640625,
+ "learning_rate": 1.6661819471295704e-05,
+ "loss": 0.3985,
+ "step": 167450
+ },
+ {
+ "epoch": 0.8641994417529576,
+ "grad_norm": 25983.62109375,
+ "learning_rate": 1.6634603575198387e-05,
+ "loss": 0.4033,
+ "step": 167500
+ },
+ {
+ "epoch": 0.8644574117355704,
+ "grad_norm": 21851.826171875,
+ "learning_rate": 1.6607405488903582e-05,
+ "loss": 0.4067,
+ "step": 167550
+ },
+ {
+ "epoch": 0.8647153817181833,
+ "grad_norm": 23041.548828125,
+ "learning_rate": 1.6580225226929152e-05,
+ "loss": 0.4054,
+ "step": 167600
+ },
+ {
+ "epoch": 0.8649733517007961,
+ "grad_norm": 24893.72265625,
+ "learning_rate": 1.655306280378333e-05,
+ "loss": 0.4081,
+ "step": 167650
+ },
+ {
+ "epoch": 0.8652313216834089,
+ "grad_norm": 24462.869140625,
+ "learning_rate": 1.6525918233964933e-05,
+ "loss": 0.4093,
+ "step": 167700
+ },
+ {
+ "epoch": 0.8654892916660217,
+ "grad_norm": 20188.037109375,
+ "learning_rate": 1.6498791531963197e-05,
+ "loss": 0.3986,
+ "step": 167750
+ },
+ {
+ "epoch": 0.8657472616486346,
+ "grad_norm": 24806.51171875,
+ "learning_rate": 1.6471682712257812e-05,
+ "loss": 0.3988,
+ "step": 167800
+ },
+ {
+ "epoch": 0.8660052316312474,
+ "grad_norm": 21647.11328125,
+ "learning_rate": 1.6444591789318992e-05,
+ "loss": 0.4083,
+ "step": 167850
+ },
+ {
+ "epoch": 0.8662632016138602,
+ "grad_norm": 22894.3515625,
+ "learning_rate": 1.6417518777607277e-05,
+ "loss": 0.4004,
+ "step": 167900
+ },
+ {
+ "epoch": 0.866521171596473,
+ "grad_norm": 23173.974609375,
+ "learning_rate": 1.6390463691573765e-05,
+ "loss": 0.409,
+ "step": 167950
+ },
+ {
+ "epoch": 0.8667791415790859,
+ "grad_norm": 24268.001953125,
+ "learning_rate": 1.6363426545659927e-05,
+ "loss": 0.4021,
+ "step": 168000
+ },
+ {
+ "epoch": 0.8670371115616987,
+ "grad_norm": 23466.482421875,
+ "learning_rate": 1.6336407354297667e-05,
+ "loss": 0.4067,
+ "step": 168050
+ },
+ {
+ "epoch": 0.8672950815443115,
+ "grad_norm": 22965.560546875,
+ "learning_rate": 1.6309406131909298e-05,
+ "loss": 0.4127,
+ "step": 168100
+ },
+ {
+ "epoch": 0.8675530515269243,
+ "grad_norm": 22818.5859375,
+ "learning_rate": 1.6282422892907563e-05,
+ "loss": 0.4107,
+ "step": 168150
+ },
+ {
+ "epoch": 0.8678110215095372,
+ "grad_norm": 23358.80859375,
+ "learning_rate": 1.6255457651695565e-05,
+ "loss": 0.3985,
+ "step": 168200
+ },
+ {
+ "epoch": 0.86806899149215,
+ "grad_norm": 24952.044921875,
+ "learning_rate": 1.6228510422666865e-05,
+ "loss": 0.4021,
+ "step": 168250
+ },
+ {
+ "epoch": 0.8683269614747628,
+ "grad_norm": 23554.359375,
+ "learning_rate": 1.6201581220205353e-05,
+ "loss": 0.4091,
+ "step": 168300
+ },
+ {
+ "epoch": 0.8685849314573756,
+ "grad_norm": 23862.92578125,
+ "learning_rate": 1.6174670058685316e-05,
+ "loss": 0.4009,
+ "step": 168350
+ },
+ {
+ "epoch": 0.8688429014399884,
+ "grad_norm": 23549.693359375,
+ "learning_rate": 1.6147776952471415e-05,
+ "loss": 0.4062,
+ "step": 168400
+ },
+ {
+ "epoch": 0.8691008714226013,
+ "grad_norm": 25237.26953125,
+ "learning_rate": 1.612090191591865e-05,
+ "loss": 0.4009,
+ "step": 168450
+ },
+ {
+ "epoch": 0.8693588414052141,
+ "grad_norm": 24368.298828125,
+ "learning_rate": 1.6094044963372444e-05,
+ "loss": 0.4052,
+ "step": 168500
+ },
+ {
+ "epoch": 0.8696168113878269,
+ "grad_norm": 24438.0,
+ "learning_rate": 1.6067206109168453e-05,
+ "loss": 0.4077,
+ "step": 168550
+ },
+ {
+ "epoch": 0.8698747813704397,
+ "grad_norm": 30002.744140625,
+ "learning_rate": 1.6040385367632786e-05,
+ "loss": 0.4029,
+ "step": 168600
+ },
+ {
+ "epoch": 0.8701327513530526,
+ "grad_norm": 24591.333984375,
+ "learning_rate": 1.6013582753081824e-05,
+ "loss": 0.4019,
+ "step": 168650
+ },
+ {
+ "epoch": 0.8703907213356654,
+ "grad_norm": 24005.166015625,
+ "learning_rate": 1.5986798279822263e-05,
+ "loss": 0.4046,
+ "step": 168700
+ },
+ {
+ "epoch": 0.8706486913182782,
+ "grad_norm": 22198.482421875,
+ "learning_rate": 1.5960031962151167e-05,
+ "loss": 0.4003,
+ "step": 168750
+ },
+ {
+ "epoch": 0.870906661300891,
+ "grad_norm": 23392.919921875,
+ "learning_rate": 1.5933283814355872e-05,
+ "loss": 0.4039,
+ "step": 168800
+ },
+ {
+ "epoch": 0.8711646312835039,
+ "grad_norm": 26185.88671875,
+ "learning_rate": 1.5906553850714003e-05,
+ "loss": 0.4044,
+ "step": 168850
+ },
+ {
+ "epoch": 0.8714226012661167,
+ "grad_norm": 34066.59765625,
+ "learning_rate": 1.5879842085493514e-05,
+ "loss": 0.4068,
+ "step": 168900
+ },
+ {
+ "epoch": 0.8716805712487296,
+ "grad_norm": 21913.802734375,
+ "learning_rate": 1.5853148532952616e-05,
+ "loss": 0.4083,
+ "step": 168950
+ },
+ {
+ "epoch": 0.8719385412313423,
+ "grad_norm": 22491.25390625,
+ "learning_rate": 1.5826473207339802e-05,
+ "loss": 0.4037,
+ "step": 169000
+ },
+ {
+ "epoch": 0.8721965112139551,
+ "grad_norm": 23891.447265625,
+ "learning_rate": 1.579981612289389e-05,
+ "loss": 0.4033,
+ "step": 169050
+ },
+ {
+ "epoch": 0.872454481196568,
+ "grad_norm": 24374.109375,
+ "learning_rate": 1.5773177293843855e-05,
+ "loss": 0.41,
+ "step": 169100
+ },
+ {
+ "epoch": 0.8727124511791808,
+ "grad_norm": 24323.197265625,
+ "learning_rate": 1.574655673440903e-05,
+ "loss": 0.3999,
+ "step": 169150
+ },
+ {
+ "epoch": 0.8729704211617936,
+ "grad_norm": 22040.76171875,
+ "learning_rate": 1.5719954458798943e-05,
+ "loss": 0.3997,
+ "step": 169200
+ },
+ {
+ "epoch": 0.8732283911444064,
+ "grad_norm": 32067.173828125,
+ "learning_rate": 1.5693370481213355e-05,
+ "loss": 0.4028,
+ "step": 169250
+ },
+ {
+ "epoch": 0.8734863611270193,
+ "grad_norm": 27840.97265625,
+ "learning_rate": 1.5666804815842322e-05,
+ "loss": 0.4082,
+ "step": 169300
+ },
+ {
+ "epoch": 0.8737443311096321,
+ "grad_norm": 23976.154296875,
+ "learning_rate": 1.5640257476866033e-05,
+ "loss": 0.4075,
+ "step": 169350
+ },
+ {
+ "epoch": 0.8740023010922449,
+ "grad_norm": 22856.724609375,
+ "learning_rate": 1.5613728478454976e-05,
+ "loss": 0.4033,
+ "step": 169400
+ },
+ {
+ "epoch": 0.8742602710748577,
+ "grad_norm": 22639.69140625,
+ "learning_rate": 1.5587217834769803e-05,
+ "loss": 0.4052,
+ "step": 169450
+ },
+ {
+ "epoch": 0.8745182410574706,
+ "grad_norm": 24272.626953125,
+ "learning_rate": 1.5560725559961386e-05,
+ "loss": 0.4029,
+ "step": 169500
+ },
+ {
+ "epoch": 0.8747762110400834,
+ "grad_norm": 23789.333984375,
+ "learning_rate": 1.553425166817079e-05,
+ "loss": 0.4078,
+ "step": 169550
+ },
+ {
+ "epoch": 0.8750341810226961,
+ "grad_norm": 23287.294921875,
+ "learning_rate": 1.5507796173529248e-05,
+ "loss": 0.408,
+ "step": 169600
+ },
+ {
+ "epoch": 0.875292151005309,
+ "grad_norm": 22272.13671875,
+ "learning_rate": 1.548135909015822e-05,
+ "loss": 0.4017,
+ "step": 169650
+ },
+ {
+ "epoch": 0.8755501209879218,
+ "grad_norm": 24645.40234375,
+ "learning_rate": 1.5454940432169297e-05,
+ "loss": 0.4001,
+ "step": 169700
+ },
+ {
+ "epoch": 0.8758080909705347,
+ "grad_norm": 26364.072265625,
+ "learning_rate": 1.5428540213664243e-05,
+ "loss": 0.411,
+ "step": 169750
+ },
+ {
+ "epoch": 0.8760660609531475,
+ "grad_norm": 24535.76171875,
+ "learning_rate": 1.5402158448734987e-05,
+ "loss": 0.4042,
+ "step": 169800
+ },
+ {
+ "epoch": 0.8763240309357603,
+ "grad_norm": 23294.94140625,
+ "learning_rate": 1.53757951514636e-05,
+ "loss": 0.4083,
+ "step": 169850
+ },
+ {
+ "epoch": 0.8765820009183731,
+ "grad_norm": 23390.046875,
+ "learning_rate": 1.5349450335922295e-05,
+ "loss": 0.399,
+ "step": 169900
+ },
+ {
+ "epoch": 0.876839970900986,
+ "grad_norm": 23079.41796875,
+ "learning_rate": 1.5323124016173455e-05,
+ "loss": 0.4078,
+ "step": 169950
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "grad_norm": 24190.23046875,
+ "learning_rate": 1.529681620626951e-05,
+ "loss": 0.4013,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "eval_loss": 0.39030978083610535,
+ "eval_runtime": 3197.421,
+ "eval_samples_per_second": 969.882,
+ "eval_steps_per_second": 1.894,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8773559108662116,
+ "grad_norm": 24830.658203125,
+ "learning_rate": 1.5270526920253098e-05,
+ "loss": 0.4053,
+ "step": 170050
+ },
+ {
+ "epoch": 0.8776138808488244,
+ "grad_norm": 21314.533203125,
+ "learning_rate": 1.5244256172156923e-05,
+ "loss": 0.4067,
+ "step": 170100
+ },
+ {
+ "epoch": 0.8778718508314373,
+ "grad_norm": 23271.314453125,
+ "learning_rate": 1.521800397600378e-05,
+ "loss": 0.4024,
+ "step": 170150
+ },
+ {
+ "epoch": 0.8781298208140501,
+ "grad_norm": 20112.265625,
+ "learning_rate": 1.5191770345806632e-05,
+ "loss": 0.4001,
+ "step": 170200
+ },
+ {
+ "epoch": 0.8783877907966628,
+ "grad_norm": 23957.087890625,
+ "learning_rate": 1.5165555295568418e-05,
+ "loss": 0.406,
+ "step": 170250
+ },
+ {
+ "epoch": 0.8786457607792757,
+ "grad_norm": 23699.181640625,
+ "learning_rate": 1.5139358839282275e-05,
+ "loss": 0.4005,
+ "step": 170300
+ },
+ {
+ "epoch": 0.8789037307618885,
+ "grad_norm": 23276.4453125,
+ "learning_rate": 1.5113180990931353e-05,
+ "loss": 0.4057,
+ "step": 170350
+ },
+ {
+ "epoch": 0.8791617007445014,
+ "grad_norm": 27051.26171875,
+ "learning_rate": 1.5087021764488867e-05,
+ "loss": 0.4037,
+ "step": 170400
+ },
+ {
+ "epoch": 0.8794196707271142,
+ "grad_norm": 24315.11328125,
+ "learning_rate": 1.5060881173918112e-05,
+ "loss": 0.4004,
+ "step": 170450
+ },
+ {
+ "epoch": 0.879677640709727,
+ "grad_norm": 22589.85546875,
+ "learning_rate": 1.5034759233172419e-05,
+ "loss": 0.402,
+ "step": 170500
+ },
+ {
+ "epoch": 0.8799356106923398,
+ "grad_norm": 24601.666015625,
+ "learning_rate": 1.5008655956195195e-05,
+ "loss": 0.4083,
+ "step": 170550
+ },
+ {
+ "epoch": 0.8801935806749527,
+ "grad_norm": 23203.884765625,
+ "learning_rate": 1.4982571356919862e-05,
+ "loss": 0.3971,
+ "step": 170600
+ },
+ {
+ "epoch": 0.8804515506575655,
+ "grad_norm": 28701.162109375,
+ "learning_rate": 1.4956505449269858e-05,
+ "loss": 0.3989,
+ "step": 170650
+ },
+ {
+ "epoch": 0.8807095206401783,
+ "grad_norm": 23548.541015625,
+ "learning_rate": 1.4930458247158668e-05,
+ "loss": 0.4014,
+ "step": 170700
+ },
+ {
+ "epoch": 0.8809674906227911,
+ "grad_norm": 26836.626953125,
+ "learning_rate": 1.4904429764489792e-05,
+ "loss": 0.3964,
+ "step": 170750
+ },
+ {
+ "epoch": 0.881225460605404,
+ "grad_norm": 23989.537109375,
+ "learning_rate": 1.4878420015156697e-05,
+ "loss": 0.4062,
+ "step": 170800
+ },
+ {
+ "epoch": 0.8814834305880168,
+ "grad_norm": 22008.498046875,
+ "learning_rate": 1.4852429013042945e-05,
+ "loss": 0.4034,
+ "step": 170850
+ },
+ {
+ "epoch": 0.8817414005706296,
+ "grad_norm": 22564.548828125,
+ "learning_rate": 1.4826456772021957e-05,
+ "loss": 0.3953,
+ "step": 170900
+ },
+ {
+ "epoch": 0.8819993705532424,
+ "grad_norm": 20611.005859375,
+ "learning_rate": 1.4800503305957264e-05,
+ "loss": 0.3993,
+ "step": 170950
+ },
+ {
+ "epoch": 0.8822573405358553,
+ "grad_norm": 23731.072265625,
+ "learning_rate": 1.4774568628702312e-05,
+ "loss": 0.4008,
+ "step": 171000
+ },
+ {
+ "epoch": 0.8825153105184681,
+ "grad_norm": 23515.265625,
+ "learning_rate": 1.4748652754100506e-05,
+ "loss": 0.4093,
+ "step": 171050
+ },
+ {
+ "epoch": 0.882773280501081,
+ "grad_norm": 20889.193359375,
+ "learning_rate": 1.4722755695985291e-05,
+ "loss": 0.4036,
+ "step": 171100
+ },
+ {
+ "epoch": 0.8830312504836937,
+ "grad_norm": 23561.208984375,
+ "learning_rate": 1.4696877468179954e-05,
+ "loss": 0.4009,
+ "step": 171150
+ },
+ {
+ "epoch": 0.8832892204663065,
+ "grad_norm": 29216.3046875,
+ "learning_rate": 1.4671018084497828e-05,
+ "loss": 0.4087,
+ "step": 171200
+ },
+ {
+ "epoch": 0.8835471904489194,
+ "grad_norm": 24697.615234375,
+ "learning_rate": 1.4645177558742147e-05,
+ "loss": 0.3976,
+ "step": 171250
+ },
+ {
+ "epoch": 0.8838051604315322,
+ "grad_norm": 30338.123046875,
+ "learning_rate": 1.4619355904706062e-05,
+ "loss": 0.4046,
+ "step": 171300
+ },
+ {
+ "epoch": 0.884063130414145,
+ "grad_norm": 22565.310546875,
+ "learning_rate": 1.4593553136172705e-05,
+ "loss": 0.4011,
+ "step": 171350
+ },
+ {
+ "epoch": 0.8843211003967578,
+ "grad_norm": 23498.0,
+ "learning_rate": 1.4567769266915077e-05,
+ "loss": 0.4071,
+ "step": 171400
+ },
+ {
+ "epoch": 0.8845790703793707,
+ "grad_norm": 23772.279296875,
+ "learning_rate": 1.4542004310696112e-05,
+ "loss": 0.4048,
+ "step": 171450
+ },
+ {
+ "epoch": 0.8848370403619835,
+ "grad_norm": 22418.015625,
+ "learning_rate": 1.4516258281268636e-05,
+ "loss": 0.4009,
+ "step": 171500
+ },
+ {
+ "epoch": 0.8850950103445963,
+ "grad_norm": 25706.166015625,
+ "learning_rate": 1.4490531192375395e-05,
+ "loss": 0.4017,
+ "step": 171550
+ },
+ {
+ "epoch": 0.8853529803272091,
+ "grad_norm": 23563.37890625,
+ "learning_rate": 1.4464823057748982e-05,
+ "loss": 0.4056,
+ "step": 171600
+ },
+ {
+ "epoch": 0.885610950309822,
+ "grad_norm": 23104.65234375,
+ "learning_rate": 1.4439133891111956e-05,
+ "loss": 0.4014,
+ "step": 171650
+ },
+ {
+ "epoch": 0.8858689202924348,
+ "grad_norm": 22858.935546875,
+ "learning_rate": 1.4413463706176627e-05,
+ "loss": 0.4047,
+ "step": 171700
+ },
+ {
+ "epoch": 0.8861268902750475,
+ "grad_norm": 23197.859375,
+ "learning_rate": 1.4387812516645299e-05,
+ "loss": 0.4032,
+ "step": 171750
+ },
+ {
+ "epoch": 0.8863848602576604,
+ "grad_norm": 22323.4609375,
+ "learning_rate": 1.4362180336210057e-05,
+ "loss": 0.4018,
+ "step": 171800
+ },
+ {
+ "epoch": 0.8866428302402732,
+ "grad_norm": 23677.431640625,
+ "learning_rate": 1.433656717855285e-05,
+ "loss": 0.4057,
+ "step": 171850
+ },
+ {
+ "epoch": 0.8869008002228861,
+ "grad_norm": 22975.283203125,
+ "learning_rate": 1.4310973057345538e-05,
+ "loss": 0.4077,
+ "step": 171900
+ },
+ {
+ "epoch": 0.8871587702054989,
+ "grad_norm": 23338.005859375,
+ "learning_rate": 1.4285397986249694e-05,
+ "loss": 0.4037,
+ "step": 171950
+ },
+ {
+ "epoch": 0.8874167401881117,
+ "grad_norm": 22469.08203125,
+ "learning_rate": 1.4259841978916849e-05,
+ "loss": 0.4025,
+ "step": 172000
+ },
+ {
+ "epoch": 0.8876747101707245,
+ "grad_norm": 23508.064453125,
+ "learning_rate": 1.4234305048988288e-05,
+ "loss": 0.3979,
+ "step": 172050
+ },
+ {
+ "epoch": 0.8879326801533374,
+ "grad_norm": 25113.62890625,
+ "learning_rate": 1.4208787210095126e-05,
+ "loss": 0.3988,
+ "step": 172100
+ },
+ {
+ "epoch": 0.8881906501359502,
+ "grad_norm": 23230.75,
+ "learning_rate": 1.4183288475858298e-05,
+ "loss": 0.4029,
+ "step": 172150
+ },
+ {
+ "epoch": 0.888448620118563,
+ "grad_norm": 22058.306640625,
+ "learning_rate": 1.4157808859888516e-05,
+ "loss": 0.4082,
+ "step": 172200
+ },
+ {
+ "epoch": 0.8887065901011758,
+ "grad_norm": 23375.91015625,
+ "learning_rate": 1.4132348375786336e-05,
+ "loss": 0.407,
+ "step": 172250
+ },
+ {
+ "epoch": 0.8889645600837887,
+ "grad_norm": 21199.943359375,
+ "learning_rate": 1.4106907037142059e-05,
+ "loss": 0.4039,
+ "step": 172300
+ },
+ {
+ "epoch": 0.8892225300664015,
+ "grad_norm": 22754.287109375,
+ "learning_rate": 1.4081484857535777e-05,
+ "loss": 0.4,
+ "step": 172350
+ },
+ {
+ "epoch": 0.8894805000490142,
+ "grad_norm": 23116.21484375,
+ "learning_rate": 1.405608185053735e-05,
+ "loss": 0.4026,
+ "step": 172400
+ },
+ {
+ "epoch": 0.8897384700316271,
+ "grad_norm": 22281.65625,
+ "learning_rate": 1.4030698029706423e-05,
+ "loss": 0.3992,
+ "step": 172450
+ },
+ {
+ "epoch": 0.8899964400142399,
+ "grad_norm": 22979.447265625,
+ "learning_rate": 1.400533340859237e-05,
+ "loss": 0.4027,
+ "step": 172500
+ },
+ {
+ "epoch": 0.8902544099968528,
+ "grad_norm": 25733.873046875,
+ "learning_rate": 1.3979988000734373e-05,
+ "loss": 0.4092,
+ "step": 172550
+ },
+ {
+ "epoch": 0.8905123799794656,
+ "grad_norm": 23825.38671875,
+ "learning_rate": 1.395466181966127e-05,
+ "loss": 0.3997,
+ "step": 172600
+ },
+ {
+ "epoch": 0.8907703499620784,
+ "grad_norm": 27504.0703125,
+ "learning_rate": 1.3929354878891715e-05,
+ "loss": 0.403,
+ "step": 172650
+ },
+ {
+ "epoch": 0.8910283199446912,
+ "grad_norm": 28201.208984375,
+ "learning_rate": 1.3904067191934067e-05,
+ "loss": 0.4029,
+ "step": 172700
+ },
+ {
+ "epoch": 0.8912862899273041,
+ "grad_norm": 24115.69140625,
+ "learning_rate": 1.3878798772286377e-05,
+ "loss": 0.3979,
+ "step": 172750
+ },
+ {
+ "epoch": 0.8915442599099169,
+ "grad_norm": 20489.552734375,
+ "learning_rate": 1.3853549633436491e-05,
+ "loss": 0.4001,
+ "step": 172800
+ },
+ {
+ "epoch": 0.8918022298925297,
+ "grad_norm": 23580.583984375,
+ "learning_rate": 1.3828319788861838e-05,
+ "loss": 0.3983,
+ "step": 172850
+ },
+ {
+ "epoch": 0.8920601998751425,
+ "grad_norm": 24172.771484375,
+ "learning_rate": 1.3803109252029678e-05,
+ "loss": 0.4081,
+ "step": 172900
+ },
+ {
+ "epoch": 0.8923181698577554,
+ "grad_norm": 26543.375,
+ "learning_rate": 1.3777918036396887e-05,
+ "loss": 0.4015,
+ "step": 172950
+ },
+ {
+ "epoch": 0.8925761398403682,
+ "grad_norm": 27849.654296875,
+ "learning_rate": 1.3752746155410046e-05,
+ "loss": 0.4045,
+ "step": 173000
+ },
+ {
+ "epoch": 0.892834109822981,
+ "grad_norm": 25752.724609375,
+ "learning_rate": 1.3727593622505424e-05,
+ "loss": 0.4022,
+ "step": 173050
+ },
+ {
+ "epoch": 0.8930920798055938,
+ "grad_norm": 22836.892578125,
+ "learning_rate": 1.3702460451108935e-05,
+ "loss": 0.4015,
+ "step": 173100
+ },
+ {
+ "epoch": 0.8933500497882066,
+ "grad_norm": 26556.62890625,
+ "learning_rate": 1.3677346654636208e-05,
+ "loss": 0.4017,
+ "step": 173150
+ },
+ {
+ "epoch": 0.8936080197708195,
+ "grad_norm": 24310.390625,
+ "learning_rate": 1.3652252246492492e-05,
+ "loss": 0.4015,
+ "step": 173200
+ },
+ {
+ "epoch": 0.8938659897534323,
+ "grad_norm": 23713.0859375,
+ "learning_rate": 1.3627177240072698e-05,
+ "loss": 0.4024,
+ "step": 173250
+ },
+ {
+ "epoch": 0.8941239597360451,
+ "grad_norm": 21189.57421875,
+ "learning_rate": 1.3602121648761373e-05,
+ "loss": 0.4012,
+ "step": 173300
+ },
+ {
+ "epoch": 0.8943819297186579,
+ "grad_norm": 24229.1484375,
+ "learning_rate": 1.3577085485932705e-05,
+ "loss": 0.4105,
+ "step": 173350
+ },
+ {
+ "epoch": 0.8946398997012708,
+ "grad_norm": 23998.22265625,
+ "learning_rate": 1.3552068764950504e-05,
+ "loss": 0.4004,
+ "step": 173400
+ },
+ {
+ "epoch": 0.8948978696838836,
+ "grad_norm": 24751.1171875,
+ "learning_rate": 1.3527071499168253e-05,
+ "loss": 0.4024,
+ "step": 173450
+ },
+ {
+ "epoch": 0.8951558396664964,
+ "grad_norm": 24872.029296875,
+ "learning_rate": 1.3502093701928948e-05,
+ "loss": 0.406,
+ "step": 173500
+ },
+ {
+ "epoch": 0.8954138096491092,
+ "grad_norm": 23180.771484375,
+ "learning_rate": 1.3477135386565297e-05,
+ "loss": 0.4041,
+ "step": 173550
+ },
+ {
+ "epoch": 0.8956717796317221,
+ "grad_norm": 23679.1484375,
+ "learning_rate": 1.3452196566399555e-05,
+ "loss": 0.4095,
+ "step": 173600
+ },
+ {
+ "epoch": 0.8959297496143349,
+ "grad_norm": 26730.537109375,
+ "learning_rate": 1.3427277254743565e-05,
+ "loss": 0.4058,
+ "step": 173650
+ },
+ {
+ "epoch": 0.8961877195969477,
+ "grad_norm": 23320.666015625,
+ "learning_rate": 1.3402377464898813e-05,
+ "loss": 0.4038,
+ "step": 173700
+ },
+ {
+ "epoch": 0.8964456895795605,
+ "grad_norm": 22802.87890625,
+ "learning_rate": 1.3377497210156276e-05,
+ "loss": 0.3977,
+ "step": 173750
+ },
+ {
+ "epoch": 0.8967036595621733,
+ "grad_norm": 21257.22265625,
+ "learning_rate": 1.3352636503796584e-05,
+ "loss": 0.4074,
+ "step": 173800
+ },
+ {
+ "epoch": 0.8969616295447862,
+ "grad_norm": 23935.412109375,
+ "learning_rate": 1.332779535908989e-05,
+ "loss": 0.4021,
+ "step": 173850
+ },
+ {
+ "epoch": 0.8972195995273989,
+ "grad_norm": 21819.267578125,
+ "learning_rate": 1.3302973789295925e-05,
+ "loss": 0.3992,
+ "step": 173900
+ },
+ {
+ "epoch": 0.8974775695100118,
+ "grad_norm": 23360.71875,
+ "learning_rate": 1.327817180766393e-05,
+ "loss": 0.4051,
+ "step": 173950
+ },
+ {
+ "epoch": 0.8977355394926246,
+ "grad_norm": 24474.685546875,
+ "learning_rate": 1.3253389427432772e-05,
+ "loss": 0.4046,
+ "step": 174000
+ },
+ {
+ "epoch": 0.8979935094752375,
+ "grad_norm": 29715.3359375,
+ "learning_rate": 1.3228626661830779e-05,
+ "loss": 0.4037,
+ "step": 174050
+ },
+ {
+ "epoch": 0.8982514794578503,
+ "grad_norm": 23241.20703125,
+ "learning_rate": 1.3203883524075833e-05,
+ "loss": 0.4003,
+ "step": 174100
+ },
+ {
+ "epoch": 0.8985094494404631,
+ "grad_norm": 26005.23828125,
+ "learning_rate": 1.3179160027375347e-05,
+ "loss": 0.3992,
+ "step": 174150
+ },
+ {
+ "epoch": 0.8987674194230759,
+ "grad_norm": 23322.212890625,
+ "learning_rate": 1.3154456184926234e-05,
+ "loss": 0.4037,
+ "step": 174200
+ },
+ {
+ "epoch": 0.8990253894056888,
+ "grad_norm": 22434.90234375,
+ "learning_rate": 1.3129772009914964e-05,
+ "loss": 0.4044,
+ "step": 174250
+ },
+ {
+ "epoch": 0.8992833593883016,
+ "grad_norm": 24753.904296875,
+ "learning_rate": 1.3105107515517418e-05,
+ "loss": 0.4034,
+ "step": 174300
+ },
+ {
+ "epoch": 0.8995413293709144,
+ "grad_norm": 23271.814453125,
+ "learning_rate": 1.3080462714899066e-05,
+ "loss": 0.3992,
+ "step": 174350
+ },
+ {
+ "epoch": 0.8997992993535272,
+ "grad_norm": 23929.7578125,
+ "learning_rate": 1.3055837621214811e-05,
+ "loss": 0.4018,
+ "step": 174400
+ },
+ {
+ "epoch": 0.90005726933614,
+ "grad_norm": 25211.7265625,
+ "learning_rate": 1.3031232247609037e-05,
+ "loss": 0.4052,
+ "step": 174450
+ },
+ {
+ "epoch": 0.9003152393187529,
+ "grad_norm": 24554.791015625,
+ "learning_rate": 1.300664660721566e-05,
+ "loss": 0.3987,
+ "step": 174500
+ },
+ {
+ "epoch": 0.9005732093013656,
+ "grad_norm": 26028.396484375,
+ "learning_rate": 1.2982080713157963e-05,
+ "loss": 0.4032,
+ "step": 174550
+ },
+ {
+ "epoch": 0.9008311792839785,
+ "grad_norm": 24228.72265625,
+ "learning_rate": 1.295753457854878e-05,
+ "loss": 0.4001,
+ "step": 174600
+ },
+ {
+ "epoch": 0.9010891492665913,
+ "grad_norm": 24043.064453125,
+ "learning_rate": 1.293300821649036e-05,
+ "loss": 0.4009,
+ "step": 174650
+ },
+ {
+ "epoch": 0.9013471192492042,
+ "grad_norm": 25628.208984375,
+ "learning_rate": 1.2908501640074388e-05,
+ "loss": 0.4058,
+ "step": 174700
+ },
+ {
+ "epoch": 0.901605089231817,
+ "grad_norm": 23927.81640625,
+ "learning_rate": 1.288401486238201e-05,
+ "loss": 0.4044,
+ "step": 174750
+ },
+ {
+ "epoch": 0.9018630592144298,
+ "grad_norm": 23615.923828125,
+ "learning_rate": 1.2859547896483793e-05,
+ "loss": 0.4042,
+ "step": 174800
+ },
+ {
+ "epoch": 0.9021210291970426,
+ "grad_norm": 24990.158203125,
+ "learning_rate": 1.2835100755439705e-05,
+ "loss": 0.4033,
+ "step": 174850
+ },
+ {
+ "epoch": 0.9023789991796555,
+ "grad_norm": 23908.240234375,
+ "learning_rate": 1.2810673452299194e-05,
+ "loss": 0.404,
+ "step": 174900
+ },
+ {
+ "epoch": 0.9026369691622683,
+ "grad_norm": 24776.828125,
+ "learning_rate": 1.278626600010106e-05,
+ "loss": 0.4017,
+ "step": 174950
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "grad_norm": 23400.912109375,
+ "learning_rate": 1.276187841187354e-05,
+ "loss": 0.4007,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "eval_loss": 0.389443963766098,
+ "eval_runtime": 3184.6844,
+ "eval_samples_per_second": 973.761,
+ "eval_steps_per_second": 1.902,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9031529091274939,
+ "grad_norm": 23482.337890625,
+ "learning_rate": 1.2737510700634248e-05,
+ "loss": 0.4033,
+ "step": 175050
+ },
+ {
+ "epoch": 0.9034108791101068,
+ "grad_norm": 24351.23828125,
+ "learning_rate": 1.2713162879390183e-05,
+ "loss": 0.4031,
+ "step": 175100
+ },
+ {
+ "epoch": 0.9036688490927196,
+ "grad_norm": 28495.6796875,
+ "learning_rate": 1.2688834961137785e-05,
+ "loss": 0.4057,
+ "step": 175150
+ },
+ {
+ "epoch": 0.9039268190753323,
+ "grad_norm": 23276.583984375,
+ "learning_rate": 1.2664526958862765e-05,
+ "loss": 0.4036,
+ "step": 175200
+ },
+ {
+ "epoch": 0.9041847890579452,
+ "grad_norm": 22784.033203125,
+ "learning_rate": 1.2640238885540312e-05,
+ "loss": 0.4054,
+ "step": 175250
+ },
+ {
+ "epoch": 0.904442759040558,
+ "grad_norm": 22389.21484375,
+ "learning_rate": 1.2615970754134914e-05,
+ "loss": 0.4036,
+ "step": 175300
+ },
+ {
+ "epoch": 0.9047007290231709,
+ "grad_norm": 24767.59375,
+ "learning_rate": 1.2591722577600412e-05,
+ "loss": 0.4055,
+ "step": 175350
+ },
+ {
+ "epoch": 0.9049586990057837,
+ "grad_norm": 24981.552734375,
+ "learning_rate": 1.2567494368880056e-05,
+ "loss": 0.3997,
+ "step": 175400
+ },
+ {
+ "epoch": 0.9052166689883965,
+ "grad_norm": 24523.580078125,
+ "learning_rate": 1.254328614090634e-05,
+ "loss": 0.4009,
+ "step": 175450
+ },
+ {
+ "epoch": 0.9054746389710093,
+ "grad_norm": 29571.404296875,
+ "learning_rate": 1.251909790660119e-05,
+ "loss": 0.4013,
+ "step": 175500
+ },
+ {
+ "epoch": 0.9057326089536222,
+ "grad_norm": 23286.564453125,
+ "learning_rate": 1.24949296788758e-05,
+ "loss": 0.3997,
+ "step": 175550
+ },
+ {
+ "epoch": 0.905990578936235,
+ "grad_norm": 23124.205078125,
+ "learning_rate": 1.247078147063071e-05,
+ "loss": 0.4056,
+ "step": 175600
+ },
+ {
+ "epoch": 0.9062485489188478,
+ "grad_norm": 23467.775390625,
+ "learning_rate": 1.2446653294755755e-05,
+ "loss": 0.3976,
+ "step": 175650
+ },
+ {
+ "epoch": 0.9065065189014606,
+ "grad_norm": 23793.609375,
+ "learning_rate": 1.2422545164130096e-05,
+ "loss": 0.4018,
+ "step": 175700
+ },
+ {
+ "epoch": 0.9067644888840735,
+ "grad_norm": 24439.974609375,
+ "learning_rate": 1.2398457091622167e-05,
+ "loss": 0.4063,
+ "step": 175750
+ },
+ {
+ "epoch": 0.9070224588666863,
+ "grad_norm": 23925.22265625,
+ "learning_rate": 1.2374389090089744e-05,
+ "loss": 0.4039,
+ "step": 175800
+ },
+ {
+ "epoch": 0.907280428849299,
+ "grad_norm": 23174.416015625,
+ "learning_rate": 1.2350341172379853e-05,
+ "loss": 0.4031,
+ "step": 175850
+ },
+ {
+ "epoch": 0.9075383988319119,
+ "grad_norm": 26669.806640625,
+ "learning_rate": 1.2326313351328794e-05,
+ "loss": 0.4031,
+ "step": 175900
+ },
+ {
+ "epoch": 0.9077963688145247,
+ "grad_norm": 21128.041015625,
+ "learning_rate": 1.2302305639762168e-05,
+ "loss": 0.407,
+ "step": 175950
+ },
+ {
+ "epoch": 0.9080543387971376,
+ "grad_norm": 22798.111328125,
+ "learning_rate": 1.2278318050494797e-05,
+ "loss": 0.4035,
+ "step": 176000
+ },
+ {
+ "epoch": 0.9083123087797504,
+ "grad_norm": 23327.587890625,
+ "learning_rate": 1.2254350596330843e-05,
+ "loss": 0.3958,
+ "step": 176050
+ },
+ {
+ "epoch": 0.9085702787623632,
+ "grad_norm": 22225.3125,
+ "learning_rate": 1.2230403290063613e-05,
+ "loss": 0.4074,
+ "step": 176100
+ },
+ {
+ "epoch": 0.908828248744976,
+ "grad_norm": 22727.791015625,
+ "learning_rate": 1.2206476144475754e-05,
+ "loss": 0.4063,
+ "step": 176150
+ },
+ {
+ "epoch": 0.9090862187275889,
+ "grad_norm": 26138.931640625,
+ "learning_rate": 1.2182569172339098e-05,
+ "loss": 0.408,
+ "step": 176200
+ },
+ {
+ "epoch": 0.9093441887102017,
+ "grad_norm": 23436.91796875,
+ "learning_rate": 1.2158682386414716e-05,
+ "loss": 0.4038,
+ "step": 176250
+ },
+ {
+ "epoch": 0.9096021586928145,
+ "grad_norm": 23695.244140625,
+ "learning_rate": 1.2134815799452947e-05,
+ "loss": 0.4074,
+ "step": 176300
+ },
+ {
+ "epoch": 0.9098601286754273,
+ "grad_norm": 25616.240234375,
+ "learning_rate": 1.2110969424193263e-05,
+ "loss": 0.3971,
+ "step": 176350
+ },
+ {
+ "epoch": 0.9101180986580402,
+ "grad_norm": 27326.634765625,
+ "learning_rate": 1.2087143273364431e-05,
+ "loss": 0.4045,
+ "step": 176400
+ },
+ {
+ "epoch": 0.910376068640653,
+ "grad_norm": 23704.775390625,
+ "learning_rate": 1.2063337359684384e-05,
+ "loss": 0.4071,
+ "step": 176450
+ },
+ {
+ "epoch": 0.9106340386232658,
+ "grad_norm": 25532.234375,
+ "learning_rate": 1.2039551695860251e-05,
+ "loss": 0.4021,
+ "step": 176500
+ },
+ {
+ "epoch": 0.9108920086058786,
+ "grad_norm": 25247.884765625,
+ "learning_rate": 1.201578629458835e-05,
+ "loss": 0.4074,
+ "step": 176550
+ },
+ {
+ "epoch": 0.9111499785884914,
+ "grad_norm": 29377.486328125,
+ "learning_rate": 1.1992041168554236e-05,
+ "loss": 0.4064,
+ "step": 176600
+ },
+ {
+ "epoch": 0.9114079485711043,
+ "grad_norm": 22188.34375,
+ "learning_rate": 1.1968316330432527e-05,
+ "loss": 0.404,
+ "step": 176650
+ },
+ {
+ "epoch": 0.911665918553717,
+ "grad_norm": 23766.0546875,
+ "learning_rate": 1.194461179288714e-05,
+ "loss": 0.4016,
+ "step": 176700
+ },
+ {
+ "epoch": 0.9119238885363299,
+ "grad_norm": 21386.623046875,
+ "learning_rate": 1.1920927568571078e-05,
+ "loss": 0.4055,
+ "step": 176750
+ },
+ {
+ "epoch": 0.9121818585189427,
+ "grad_norm": 25873.052734375,
+ "learning_rate": 1.1897263670126507e-05,
+ "loss": 0.3978,
+ "step": 176800
+ },
+ {
+ "epoch": 0.9124398285015556,
+ "grad_norm": 25235.5390625,
+ "learning_rate": 1.1873620110184803e-05,
+ "loss": 0.3975,
+ "step": 176850
+ },
+ {
+ "epoch": 0.9126977984841684,
+ "grad_norm": 22841.5,
+ "learning_rate": 1.1849996901366383e-05,
+ "loss": 0.4031,
+ "step": 176900
+ },
+ {
+ "epoch": 0.9129557684667812,
+ "grad_norm": 21522.388671875,
+ "learning_rate": 1.1826394056280893e-05,
+ "loss": 0.4048,
+ "step": 176950
+ },
+ {
+ "epoch": 0.913213738449394,
+ "grad_norm": 27600.689453125,
+ "learning_rate": 1.1802811587527074e-05,
+ "loss": 0.3984,
+ "step": 177000
+ },
+ {
+ "epoch": 0.9134717084320069,
+ "grad_norm": 24698.60546875,
+ "learning_rate": 1.177924950769278e-05,
+ "loss": 0.406,
+ "step": 177050
+ },
+ {
+ "epoch": 0.9137296784146197,
+ "grad_norm": 27378.033203125,
+ "learning_rate": 1.1755707829355001e-05,
+ "loss": 0.3993,
+ "step": 177100
+ },
+ {
+ "epoch": 0.9139876483972325,
+ "grad_norm": 27578.4296875,
+ "learning_rate": 1.1732186565079805e-05,
+ "loss": 0.3984,
+ "step": 177150
+ },
+ {
+ "epoch": 0.9142456183798453,
+ "grad_norm": 24650.6953125,
+ "learning_rate": 1.1708685727422424e-05,
+ "loss": 0.401,
+ "step": 177200
+ },
+ {
+ "epoch": 0.9145035883624582,
+ "grad_norm": 25550.0859375,
+ "learning_rate": 1.1685205328927135e-05,
+ "loss": 0.399,
+ "step": 177250
+ },
+ {
+ "epoch": 0.914761558345071,
+ "grad_norm": 22760.77734375,
+ "learning_rate": 1.166174538212732e-05,
+ "loss": 0.403,
+ "step": 177300
+ },
+ {
+ "epoch": 0.9150195283276837,
+ "grad_norm": 22038.26171875,
+ "learning_rate": 1.1638305899545443e-05,
+ "loss": 0.4066,
+ "step": 177350
+ },
+ {
+ "epoch": 0.9152774983102966,
+ "grad_norm": 23857.66015625,
+ "learning_rate": 1.1614886893693044e-05,
+ "loss": 0.4038,
+ "step": 177400
+ },
+ {
+ "epoch": 0.9155354682929094,
+ "grad_norm": 24813.55859375,
+ "learning_rate": 1.1591488377070724e-05,
+ "loss": 0.3992,
+ "step": 177450
+ },
+ {
+ "epoch": 0.9157934382755223,
+ "grad_norm": 24467.5859375,
+ "learning_rate": 1.1568110362168199e-05,
+ "loss": 0.4,
+ "step": 177500
+ },
+ {
+ "epoch": 0.9160514082581351,
+ "grad_norm": 22464.98046875,
+ "learning_rate": 1.1544752861464143e-05,
+ "loss": 0.4069,
+ "step": 177550
+ },
+ {
+ "epoch": 0.9163093782407479,
+ "grad_norm": 26591.51171875,
+ "learning_rate": 1.1521415887426379e-05,
+ "loss": 0.4008,
+ "step": 177600
+ },
+ {
+ "epoch": 0.9165673482233607,
+ "grad_norm": 21086.318359375,
+ "learning_rate": 1.1498099452511724e-05,
+ "loss": 0.4036,
+ "step": 177650
+ },
+ {
+ "epoch": 0.9168253182059736,
+ "grad_norm": 24243.072265625,
+ "learning_rate": 1.147480356916602e-05,
+ "loss": 0.4019,
+ "step": 177700
+ },
+ {
+ "epoch": 0.9170832881885864,
+ "grad_norm": 26714.83984375,
+ "learning_rate": 1.1451528249824206e-05,
+ "loss": 0.3978,
+ "step": 177750
+ },
+ {
+ "epoch": 0.9173412581711992,
+ "grad_norm": 24799.712890625,
+ "learning_rate": 1.1428273506910132e-05,
+ "loss": 0.4078,
+ "step": 177800
+ },
+ {
+ "epoch": 0.917599228153812,
+ "grad_norm": 25010.435546875,
+ "learning_rate": 1.1405039352836777e-05,
+ "loss": 0.4054,
+ "step": 177850
+ },
+ {
+ "epoch": 0.9178571981364249,
+ "grad_norm": 23657.78125,
+ "learning_rate": 1.1381825800006068e-05,
+ "loss": 0.4001,
+ "step": 177900
+ },
+ {
+ "epoch": 0.9181151681190377,
+ "grad_norm": 23865.349609375,
+ "learning_rate": 1.1358632860808955e-05,
+ "loss": 0.4012,
+ "step": 177950
+ },
+ {
+ "epoch": 0.9183731381016504,
+ "grad_norm": 26476.04296875,
+ "learning_rate": 1.1335460547625365e-05,
+ "loss": 0.3998,
+ "step": 178000
+ },
+ {
+ "epoch": 0.9186311080842633,
+ "grad_norm": 24907.89453125,
+ "learning_rate": 1.1312308872824235e-05,
+ "loss": 0.401,
+ "step": 178050
+ },
+ {
+ "epoch": 0.9188890780668761,
+ "grad_norm": 24008.54296875,
+ "learning_rate": 1.1289177848763494e-05,
+ "loss": 0.3991,
+ "step": 178100
+ },
+ {
+ "epoch": 0.919147048049489,
+ "grad_norm": 23814.396484375,
+ "learning_rate": 1.1266067487790027e-05,
+ "loss": 0.4039,
+ "step": 178150
+ },
+ {
+ "epoch": 0.9194050180321018,
+ "grad_norm": 25892.994140625,
+ "learning_rate": 1.1242977802239696e-05,
+ "loss": 0.4015,
+ "step": 178200
+ },
+ {
+ "epoch": 0.9196629880147146,
+ "grad_norm": 24185.7265625,
+ "learning_rate": 1.1219908804437328e-05,
+ "loss": 0.3992,
+ "step": 178250
+ },
+ {
+ "epoch": 0.9199209579973274,
+ "grad_norm": 23890.54296875,
+ "learning_rate": 1.1196860506696705e-05,
+ "loss": 0.4087,
+ "step": 178300
+ },
+ {
+ "epoch": 0.9201789279799403,
+ "grad_norm": 25288.83203125,
+ "learning_rate": 1.1173832921320554e-05,
+ "loss": 0.4038,
+ "step": 178350
+ },
+ {
+ "epoch": 0.9204368979625531,
+ "grad_norm": 27609.994140625,
+ "learning_rate": 1.1150826060600594e-05,
+ "loss": 0.4047,
+ "step": 178400
+ },
+ {
+ "epoch": 0.9206948679451659,
+ "grad_norm": 25010.259765625,
+ "learning_rate": 1.112783993681738e-05,
+ "loss": 0.4037,
+ "step": 178450
+ },
+ {
+ "epoch": 0.9209528379277787,
+ "grad_norm": 23663.78515625,
+ "learning_rate": 1.1104874562240514e-05,
+ "loss": 0.396,
+ "step": 178500
+ },
+ {
+ "epoch": 0.9212108079103916,
+ "grad_norm": 24960.072265625,
+ "learning_rate": 1.108192994912844e-05,
+ "loss": 0.4024,
+ "step": 178550
+ },
+ {
+ "epoch": 0.9214687778930044,
+ "grad_norm": 22778.66796875,
+ "learning_rate": 1.1059006109728543e-05,
+ "loss": 0.4039,
+ "step": 178600
+ },
+ {
+ "epoch": 0.9217267478756171,
+ "grad_norm": 20177.640625,
+ "learning_rate": 1.1036103056277165e-05,
+ "loss": 0.4008,
+ "step": 178650
+ },
+ {
+ "epoch": 0.92198471785823,
+ "grad_norm": 25084.703125,
+ "learning_rate": 1.1013220800999452e-05,
+ "loss": 0.4082,
+ "step": 178700
+ },
+ {
+ "epoch": 0.9222426878408428,
+ "grad_norm": 23697.529296875,
+ "learning_rate": 1.0990359356109558e-05,
+ "loss": 0.4083,
+ "step": 178750
+ },
+ {
+ "epoch": 0.9225006578234557,
+ "grad_norm": 26252.25,
+ "learning_rate": 1.0967518733810462e-05,
+ "loss": 0.4114,
+ "step": 178800
+ },
+ {
+ "epoch": 0.9227586278060684,
+ "grad_norm": 25295.103515625,
+ "learning_rate": 1.094469894629403e-05,
+ "loss": 0.4062,
+ "step": 178850
+ },
+ {
+ "epoch": 0.9230165977886813,
+ "grad_norm": 24484.203125,
+ "learning_rate": 1.0921900005741053e-05,
+ "loss": 0.4008,
+ "step": 178900
+ },
+ {
+ "epoch": 0.9232745677712941,
+ "grad_norm": 23360.701171875,
+ "learning_rate": 1.0899121924321154e-05,
+ "loss": 0.405,
+ "step": 178950
+ },
+ {
+ "epoch": 0.923532537753907,
+ "grad_norm": 22507.24609375,
+ "learning_rate": 1.0876364714192822e-05,
+ "loss": 0.3968,
+ "step": 179000
+ },
+ {
+ "epoch": 0.9237905077365198,
+ "grad_norm": 26761.66015625,
+ "learning_rate": 1.0853628387503423e-05,
+ "loss": 0.4021,
+ "step": 179050
+ },
+ {
+ "epoch": 0.9240484777191326,
+ "grad_norm": 26596.376953125,
+ "learning_rate": 1.0830912956389166e-05,
+ "loss": 0.3984,
+ "step": 179100
+ },
+ {
+ "epoch": 0.9243064477017454,
+ "grad_norm": 23996.490234375,
+ "learning_rate": 1.0808218432975093e-05,
+ "loss": 0.3996,
+ "step": 179150
+ },
+ {
+ "epoch": 0.9245644176843583,
+ "grad_norm": 22681.4609375,
+ "learning_rate": 1.0785544829375143e-05,
+ "loss": 0.4021,
+ "step": 179200
+ },
+ {
+ "epoch": 0.9248223876669711,
+ "grad_norm": 25675.728515625,
+ "learning_rate": 1.0762892157691995e-05,
+ "loss": 0.3942,
+ "step": 179250
+ },
+ {
+ "epoch": 0.9250803576495839,
+ "grad_norm": 26039.25,
+ "learning_rate": 1.0740260430017247e-05,
+ "loss": 0.4014,
+ "step": 179300
+ },
+ {
+ "epoch": 0.9253383276321967,
+ "grad_norm": 21596.50390625,
+ "learning_rate": 1.0717649658431256e-05,
+ "loss": 0.4017,
+ "step": 179350
+ },
+ {
+ "epoch": 0.9255962976148095,
+ "grad_norm": 25318.3125,
+ "learning_rate": 1.0695059855003204e-05,
+ "loss": 0.3968,
+ "step": 179400
+ },
+ {
+ "epoch": 0.9258542675974224,
+ "grad_norm": 20999.10546875,
+ "learning_rate": 1.0672491031791137e-05,
+ "loss": 0.4032,
+ "step": 179450
+ },
+ {
+ "epoch": 0.9261122375800351,
+ "grad_norm": 25034.404296875,
+ "learning_rate": 1.0649943200841794e-05,
+ "loss": 0.3987,
+ "step": 179500
+ },
+ {
+ "epoch": 0.926370207562648,
+ "grad_norm": 23470.205078125,
+ "learning_rate": 1.0627416374190819e-05,
+ "loss": 0.4009,
+ "step": 179550
+ },
+ {
+ "epoch": 0.9266281775452608,
+ "grad_norm": 23667.298828125,
+ "learning_rate": 1.0604910563862575e-05,
+ "loss": 0.4022,
+ "step": 179600
+ },
+ {
+ "epoch": 0.9268861475278737,
+ "grad_norm": 25315.5390625,
+ "learning_rate": 1.058242578187023e-05,
+ "loss": 0.4023,
+ "step": 179650
+ },
+ {
+ "epoch": 0.9271441175104865,
+ "grad_norm": 23639.34375,
+ "learning_rate": 1.0559962040215727e-05,
+ "loss": 0.407,
+ "step": 179700
+ },
+ {
+ "epoch": 0.9274020874930993,
+ "grad_norm": 29350.244140625,
+ "learning_rate": 1.0537519350889764e-05,
+ "loss": 0.4063,
+ "step": 179750
+ },
+ {
+ "epoch": 0.9276600574757121,
+ "grad_norm": 26077.30859375,
+ "learning_rate": 1.051509772587183e-05,
+ "loss": 0.4011,
+ "step": 179800
+ },
+ {
+ "epoch": 0.927918027458325,
+ "grad_norm": 22387.8046875,
+ "learning_rate": 1.0492697177130157e-05,
+ "loss": 0.398,
+ "step": 179850
+ },
+ {
+ "epoch": 0.9281759974409378,
+ "grad_norm": 24023.2734375,
+ "learning_rate": 1.0470317716621719e-05,
+ "loss": 0.4026,
+ "step": 179900
+ },
+ {
+ "epoch": 0.9284339674235506,
+ "grad_norm": 24288.666015625,
+ "learning_rate": 1.044795935629223e-05,
+ "loss": 0.403,
+ "step": 179950
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "grad_norm": 26163.923828125,
+ "learning_rate": 1.042562210807616e-05,
+ "loss": 0.4001,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "eval_loss": 0.3886363208293915,
+ "eval_runtime": 3188.2841,
+ "eval_samples_per_second": 972.661,
+ "eval_steps_per_second": 1.9,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9289499073887763,
+ "grad_norm": 24379.322265625,
+ "learning_rate": 1.0403305983896683e-05,
+ "loss": 0.3978,
+ "step": 180050
+ },
+ {
+ "epoch": 0.9292078773713891,
+ "grad_norm": 23249.939453125,
+ "learning_rate": 1.0381010995665752e-05,
+ "loss": 0.4055,
+ "step": 180100
+ },
+ {
+ "epoch": 0.9294658473540018,
+ "grad_norm": 25460.6875,
+ "learning_rate": 1.0358737155283942e-05,
+ "loss": 0.4059,
+ "step": 180150
+ },
+ {
+ "epoch": 0.9297238173366147,
+ "grad_norm": 23166.548828125,
+ "learning_rate": 1.0336484474640651e-05,
+ "loss": 0.4051,
+ "step": 180200
+ },
+ {
+ "epoch": 0.9299817873192275,
+ "grad_norm": 23631.94921875,
+ "learning_rate": 1.0314252965613908e-05,
+ "loss": 0.3974,
+ "step": 180250
+ },
+ {
+ "epoch": 0.9302397573018404,
+ "grad_norm": 26213.556640625,
+ "learning_rate": 1.0292042640070449e-05,
+ "loss": 0.3983,
+ "step": 180300
+ },
+ {
+ "epoch": 0.9304977272844532,
+ "grad_norm": 24056.875,
+ "learning_rate": 1.0269853509865751e-05,
+ "loss": 0.3979,
+ "step": 180350
+ },
+ {
+ "epoch": 0.930755697267066,
+ "grad_norm": 24793.658203125,
+ "learning_rate": 1.0247685586843897e-05,
+ "loss": 0.3993,
+ "step": 180400
+ },
+ {
+ "epoch": 0.9310136672496788,
+ "grad_norm": 25296.04296875,
+ "learning_rate": 1.0225538882837733e-05,
+ "loss": 0.4047,
+ "step": 180450
+ },
+ {
+ "epoch": 0.9312716372322917,
+ "grad_norm": 21486.990234375,
+ "learning_rate": 1.0203413409668722e-05,
+ "loss": 0.3995,
+ "step": 180500
+ },
+ {
+ "epoch": 0.9315296072149045,
+ "grad_norm": 24168.083984375,
+ "learning_rate": 1.018130917914702e-05,
+ "loss": 0.4081,
+ "step": 180550
+ },
+ {
+ "epoch": 0.9317875771975173,
+ "grad_norm": 25313.568359375,
+ "learning_rate": 1.0159226203071431e-05,
+ "loss": 0.4024,
+ "step": 180600
+ },
+ {
+ "epoch": 0.9320455471801301,
+ "grad_norm": 22535.845703125,
+ "learning_rate": 1.0137164493229411e-05,
+ "loss": 0.3974,
+ "step": 180650
+ },
+ {
+ "epoch": 0.932303517162743,
+ "grad_norm": 24480.0703125,
+ "learning_rate": 1.0115124061397102e-05,
+ "loss": 0.4031,
+ "step": 180700
+ },
+ {
+ "epoch": 0.9325614871453558,
+ "grad_norm": 29667.470703125,
+ "learning_rate": 1.0093104919339241e-05,
+ "loss": 0.3991,
+ "step": 180750
+ },
+ {
+ "epoch": 0.9328194571279685,
+ "grad_norm": 22311.767578125,
+ "learning_rate": 1.0071107078809228e-05,
+ "loss": 0.402,
+ "step": 180800
+ },
+ {
+ "epoch": 0.9330774271105814,
+ "grad_norm": 22752.642578125,
+ "learning_rate": 1.0049130551549068e-05,
+ "loss": 0.4022,
+ "step": 180850
+ },
+ {
+ "epoch": 0.9333353970931942,
+ "grad_norm": 26333.43359375,
+ "learning_rate": 1.0027175349289424e-05,
+ "loss": 0.4006,
+ "step": 180900
+ },
+ {
+ "epoch": 0.9335933670758071,
+ "grad_norm": 22951.927734375,
+ "learning_rate": 1.0005241483749533e-05,
+ "loss": 0.4022,
+ "step": 180950
+ },
+ {
+ "epoch": 0.9338513370584198,
+ "grad_norm": 24532.15625,
+ "learning_rate": 9.983328966637318e-06,
+ "loss": 0.398,
+ "step": 181000
+ },
+ {
+ "epoch": 0.9341093070410327,
+ "grad_norm": 24624.205078125,
+ "learning_rate": 9.961437809649188e-06,
+ "loss": 0.4021,
+ "step": 181050
+ },
+ {
+ "epoch": 0.9343672770236455,
+ "grad_norm": 23679.087890625,
+ "learning_rate": 9.93956802447027e-06,
+ "loss": 0.4038,
+ "step": 181100
+ },
+ {
+ "epoch": 0.9346252470062584,
+ "grad_norm": 22279.52734375,
+ "learning_rate": 9.917719622774219e-06,
+ "loss": 0.3987,
+ "step": 181150
+ },
+ {
+ "epoch": 0.9348832169888712,
+ "grad_norm": 25709.376953125,
+ "learning_rate": 9.895892616223268e-06,
+ "loss": 0.4062,
+ "step": 181200
+ },
+ {
+ "epoch": 0.935141186971484,
+ "grad_norm": 24607.25,
+ "learning_rate": 9.874087016468298e-06,
+ "loss": 0.3973,
+ "step": 181250
+ },
+ {
+ "epoch": 0.9353991569540968,
+ "grad_norm": 25458.861328125,
+ "learning_rate": 9.852302835148652e-06,
+ "loss": 0.3993,
+ "step": 181300
+ },
+ {
+ "epoch": 0.9356571269367097,
+ "grad_norm": 24070.654296875,
+ "learning_rate": 9.830540083892358e-06,
+ "loss": 0.4057,
+ "step": 181350
+ },
+ {
+ "epoch": 0.9359150969193225,
+ "grad_norm": 25323.736328125,
+ "learning_rate": 9.80879877431593e-06,
+ "loss": 0.407,
+ "step": 181400
+ },
+ {
+ "epoch": 0.9361730669019352,
+ "grad_norm": 27513.087890625,
+ "learning_rate": 9.787078918024455e-06,
+ "loss": 0.3979,
+ "step": 181450
+ },
+ {
+ "epoch": 0.9364310368845481,
+ "grad_norm": 22324.669921875,
+ "learning_rate": 9.765380526611568e-06,
+ "loss": 0.3984,
+ "step": 181500
+ },
+ {
+ "epoch": 0.936689006867161,
+ "grad_norm": 23778.37890625,
+ "learning_rate": 9.743703611659465e-06,
+ "loss": 0.4055,
+ "step": 181550
+ },
+ {
+ "epoch": 0.9369469768497738,
+ "grad_norm": 26777.255859375,
+ "learning_rate": 9.722048184738864e-06,
+ "loss": 0.4047,
+ "step": 181600
+ },
+ {
+ "epoch": 0.9372049468323865,
+ "grad_norm": 23210.876953125,
+ "learning_rate": 9.700414257409002e-06,
+ "loss": 0.393,
+ "step": 181650
+ },
+ {
+ "epoch": 0.9374629168149994,
+ "grad_norm": 22539.84765625,
+ "learning_rate": 9.67880184121765e-06,
+ "loss": 0.4069,
+ "step": 181700
+ },
+ {
+ "epoch": 0.9377208867976122,
+ "grad_norm": 25191.609375,
+ "learning_rate": 9.65721094770109e-06,
+ "loss": 0.4069,
+ "step": 181750
+ },
+ {
+ "epoch": 0.9379788567802251,
+ "grad_norm": 23813.578125,
+ "learning_rate": 9.63564158838416e-06,
+ "loss": 0.3954,
+ "step": 181800
+ },
+ {
+ "epoch": 0.9382368267628379,
+ "grad_norm": 23869.703125,
+ "learning_rate": 9.614093774780114e-06,
+ "loss": 0.3998,
+ "step": 181850
+ },
+ {
+ "epoch": 0.9384947967454507,
+ "grad_norm": 23316.384765625,
+ "learning_rate": 9.5925675183908e-06,
+ "loss": 0.3989,
+ "step": 181900
+ },
+ {
+ "epoch": 0.9387527667280635,
+ "grad_norm": 23641.65625,
+ "learning_rate": 9.571062830706496e-06,
+ "loss": 0.4017,
+ "step": 181950
+ },
+ {
+ "epoch": 0.9390107367106764,
+ "grad_norm": 23724.431640625,
+ "learning_rate": 9.549579723205982e-06,
+ "loss": 0.4042,
+ "step": 182000
+ },
+ {
+ "epoch": 0.9392687066932892,
+ "grad_norm": 24013.849609375,
+ "learning_rate": 9.528118207356556e-06,
+ "loss": 0.3966,
+ "step": 182050
+ },
+ {
+ "epoch": 0.939526676675902,
+ "grad_norm": 21843.55859375,
+ "learning_rate": 9.506678294613919e-06,
+ "loss": 0.4051,
+ "step": 182100
+ },
+ {
+ "epoch": 0.9397846466585148,
+ "grad_norm": 22000.7734375,
+ "learning_rate": 9.485259996422313e-06,
+ "loss": 0.4042,
+ "step": 182150
+ },
+ {
+ "epoch": 0.9400426166411276,
+ "grad_norm": 23307.556640625,
+ "learning_rate": 9.463863324214395e-06,
+ "loss": 0.4018,
+ "step": 182200
+ },
+ {
+ "epoch": 0.9403005866237405,
+ "grad_norm": 22961.353515625,
+ "learning_rate": 9.4424882894113e-06,
+ "loss": 0.3991,
+ "step": 182250
+ },
+ {
+ "epoch": 0.9405585566063532,
+ "grad_norm": 24167.134765625,
+ "learning_rate": 9.421134903422607e-06,
+ "loss": 0.4033,
+ "step": 182300
+ },
+ {
+ "epoch": 0.9408165265889661,
+ "grad_norm": 24116.75,
+ "learning_rate": 9.399803177646339e-06,
+ "loss": 0.3979,
+ "step": 182350
+ },
+ {
+ "epoch": 0.9410744965715789,
+ "grad_norm": 25658.6640625,
+ "learning_rate": 9.378493123468946e-06,
+ "loss": 0.4093,
+ "step": 182400
+ },
+ {
+ "epoch": 0.9413324665541918,
+ "grad_norm": 27761.8828125,
+ "learning_rate": 9.357204752265341e-06,
+ "loss": 0.3974,
+ "step": 182450
+ },
+ {
+ "epoch": 0.9415904365368046,
+ "grad_norm": 23456.90234375,
+ "learning_rate": 9.335938075398842e-06,
+ "loss": 0.4072,
+ "step": 182500
+ },
+ {
+ "epoch": 0.9418484065194174,
+ "grad_norm": 21258.984375,
+ "learning_rate": 9.314693104221184e-06,
+ "loss": 0.3952,
+ "step": 182550
+ },
+ {
+ "epoch": 0.9421063765020302,
+ "grad_norm": 22634.01953125,
+ "learning_rate": 9.293469850072522e-06,
+ "loss": 0.402,
+ "step": 182600
+ },
+ {
+ "epoch": 0.9423643464846431,
+ "grad_norm": 22349.267578125,
+ "learning_rate": 9.272268324281407e-06,
+ "loss": 0.3974,
+ "step": 182650
+ },
+ {
+ "epoch": 0.9426223164672559,
+ "grad_norm": 23658.505859375,
+ "learning_rate": 9.251088538164837e-06,
+ "loss": 0.3979,
+ "step": 182700
+ },
+ {
+ "epoch": 0.9428802864498687,
+ "grad_norm": 26879.39453125,
+ "learning_rate": 9.229930503028129e-06,
+ "loss": 0.3965,
+ "step": 182750
+ },
+ {
+ "epoch": 0.9431382564324815,
+ "grad_norm": 25313.255859375,
+ "learning_rate": 9.208794230165058e-06,
+ "loss": 0.4049,
+ "step": 182800
+ },
+ {
+ "epoch": 0.9433962264150944,
+ "grad_norm": 26135.587890625,
+ "learning_rate": 9.187679730857756e-06,
+ "loss": 0.408,
+ "step": 182850
+ },
+ {
+ "epoch": 0.9436541963977072,
+ "grad_norm": 24064.087890625,
+ "learning_rate": 9.166587016376715e-06,
+ "loss": 0.4025,
+ "step": 182900
+ },
+ {
+ "epoch": 0.9439121663803199,
+ "grad_norm": 24475.30859375,
+ "learning_rate": 9.145516097980856e-06,
+ "loss": 0.4019,
+ "step": 182950
+ },
+ {
+ "epoch": 0.9441701363629328,
+ "grad_norm": 23691.06640625,
+ "learning_rate": 9.12446698691738e-06,
+ "loss": 0.4031,
+ "step": 183000
+ },
+ {
+ "epoch": 0.9444281063455456,
+ "grad_norm": 25653.37109375,
+ "learning_rate": 9.103439694421928e-06,
+ "loss": 0.4007,
+ "step": 183050
+ },
+ {
+ "epoch": 0.9446860763281585,
+ "grad_norm": 22718.71875,
+ "learning_rate": 9.08243423171845e-06,
+ "loss": 0.3996,
+ "step": 183100
+ },
+ {
+ "epoch": 0.9449440463107712,
+ "grad_norm": 23337.986328125,
+ "learning_rate": 9.061450610019262e-06,
+ "loss": 0.4043,
+ "step": 183150
+ },
+ {
+ "epoch": 0.9452020162933841,
+ "grad_norm": 27628.021484375,
+ "learning_rate": 9.040488840525001e-06,
+ "loss": 0.409,
+ "step": 183200
+ },
+ {
+ "epoch": 0.9454599862759969,
+ "grad_norm": 22894.26953125,
+ "learning_rate": 9.01954893442467e-06,
+ "loss": 0.4026,
+ "step": 183250
+ },
+ {
+ "epoch": 0.9457179562586098,
+ "grad_norm": 27624.564453125,
+ "learning_rate": 8.998630902895566e-06,
+ "loss": 0.4011,
+ "step": 183300
+ },
+ {
+ "epoch": 0.9459759262412226,
+ "grad_norm": 25944.05859375,
+ "learning_rate": 8.977734757103351e-06,
+ "loss": 0.3995,
+ "step": 183350
+ },
+ {
+ "epoch": 0.9462338962238354,
+ "grad_norm": 27243.31640625,
+ "learning_rate": 8.95686050820197e-06,
+ "loss": 0.3983,
+ "step": 183400
+ },
+ {
+ "epoch": 0.9464918662064482,
+ "grad_norm": 24556.611328125,
+ "learning_rate": 8.936008167333699e-06,
+ "loss": 0.4041,
+ "step": 183450
+ },
+ {
+ "epoch": 0.9467498361890611,
+ "grad_norm": 22205.880859375,
+ "learning_rate": 8.915177745629112e-06,
+ "loss": 0.3973,
+ "step": 183500
+ },
+ {
+ "epoch": 0.9470078061716739,
+ "grad_norm": 26829.6328125,
+ "learning_rate": 8.894369254207069e-06,
+ "loss": 0.4023,
+ "step": 183550
+ },
+ {
+ "epoch": 0.9472657761542866,
+ "grad_norm": 24388.59765625,
+ "learning_rate": 8.873582704174776e-06,
+ "loss": 0.397,
+ "step": 183600
+ },
+ {
+ "epoch": 0.9475237461368995,
+ "grad_norm": 25665.98828125,
+ "learning_rate": 8.852818106627647e-06,
+ "loss": 0.4055,
+ "step": 183650
+ },
+ {
+ "epoch": 0.9477817161195123,
+ "grad_norm": 24880.47265625,
+ "learning_rate": 8.83207547264946e-06,
+ "loss": 0.4016,
+ "step": 183700
+ },
+ {
+ "epoch": 0.9480396861021252,
+ "grad_norm": 26516.6953125,
+ "learning_rate": 8.81135481331221e-06,
+ "loss": 0.3992,
+ "step": 183750
+ },
+ {
+ "epoch": 0.9482976560847379,
+ "grad_norm": 22604.123046875,
+ "learning_rate": 8.790656139676179e-06,
+ "loss": 0.401,
+ "step": 183800
+ },
+ {
+ "epoch": 0.9485556260673508,
+ "grad_norm": 24668.94921875,
+ "learning_rate": 8.769979462789957e-06,
+ "loss": 0.3974,
+ "step": 183850
+ },
+ {
+ "epoch": 0.9488135960499636,
+ "grad_norm": 26522.896484375,
+ "learning_rate": 8.749324793690295e-06,
+ "loss": 0.4048,
+ "step": 183900
+ },
+ {
+ "epoch": 0.9490715660325765,
+ "grad_norm": 26786.48046875,
+ "learning_rate": 8.728692143402295e-06,
+ "loss": 0.4075,
+ "step": 183950
+ },
+ {
+ "epoch": 0.9493295360151893,
+ "grad_norm": 23683.54296875,
+ "learning_rate": 8.708081522939265e-06,
+ "loss": 0.3996,
+ "step": 184000
+ },
+ {
+ "epoch": 0.9495875059978021,
+ "grad_norm": 23064.400390625,
+ "learning_rate": 8.687492943302739e-06,
+ "loss": 0.4036,
+ "step": 184050
+ },
+ {
+ "epoch": 0.9498454759804149,
+ "grad_norm": 24142.4921875,
+ "learning_rate": 8.666926415482501e-06,
+ "loss": 0.4023,
+ "step": 184100
+ },
+ {
+ "epoch": 0.9501034459630278,
+ "grad_norm": 24012.076171875,
+ "learning_rate": 8.6463819504566e-06,
+ "loss": 0.4024,
+ "step": 184150
+ },
+ {
+ "epoch": 0.9503614159456406,
+ "grad_norm": 22214.41015625,
+ "learning_rate": 8.625859559191224e-06,
+ "loss": 0.4002,
+ "step": 184200
+ },
+ {
+ "epoch": 0.9506193859282533,
+ "grad_norm": 24664.162109375,
+ "learning_rate": 8.60535925264086e-06,
+ "loss": 0.4027,
+ "step": 184250
+ },
+ {
+ "epoch": 0.9508773559108662,
+ "grad_norm": 21136.900390625,
+ "learning_rate": 8.584881041748171e-06,
+ "loss": 0.3957,
+ "step": 184300
+ },
+ {
+ "epoch": 0.951135325893479,
+ "grad_norm": 22411.33984375,
+ "learning_rate": 8.56442493744401e-06,
+ "loss": 0.3977,
+ "step": 184350
+ },
+ {
+ "epoch": 0.9513932958760919,
+ "grad_norm": 23004.173828125,
+ "learning_rate": 8.54399095064749e-06,
+ "loss": 0.4014,
+ "step": 184400
+ },
+ {
+ "epoch": 0.9516512658587046,
+ "grad_norm": 23692.26171875,
+ "learning_rate": 8.523579092265827e-06,
+ "loss": 0.4013,
+ "step": 184450
+ },
+ {
+ "epoch": 0.9519092358413175,
+ "grad_norm": 25310.919921875,
+ "learning_rate": 8.503189373194509e-06,
+ "loss": 0.3961,
+ "step": 184500
+ },
+ {
+ "epoch": 0.9521672058239303,
+ "grad_norm": 25963.943359375,
+ "learning_rate": 8.482821804317171e-06,
+ "loss": 0.4049,
+ "step": 184550
+ },
+ {
+ "epoch": 0.9524251758065432,
+ "grad_norm": 24282.115234375,
+ "learning_rate": 8.46247639650562e-06,
+ "loss": 0.4008,
+ "step": 184600
+ },
+ {
+ "epoch": 0.952683145789156,
+ "grad_norm": 24703.26953125,
+ "learning_rate": 8.442153160619837e-06,
+ "loss": 0.4063,
+ "step": 184650
+ },
+ {
+ "epoch": 0.9529411157717688,
+ "grad_norm": 23616.09375,
+ "learning_rate": 8.421852107507966e-06,
+ "loss": 0.3974,
+ "step": 184700
+ },
+ {
+ "epoch": 0.9531990857543816,
+ "grad_norm": 25447.408203125,
+ "learning_rate": 8.40157324800634e-06,
+ "loss": 0.4066,
+ "step": 184750
+ },
+ {
+ "epoch": 0.9534570557369945,
+ "grad_norm": 25534.3984375,
+ "learning_rate": 8.381316592939403e-06,
+ "loss": 0.4027,
+ "step": 184800
+ },
+ {
+ "epoch": 0.9537150257196073,
+ "grad_norm": 24251.138671875,
+ "learning_rate": 8.361082153119777e-06,
+ "loss": 0.3958,
+ "step": 184850
+ },
+ {
+ "epoch": 0.95397299570222,
+ "grad_norm": 26980.046875,
+ "learning_rate": 8.3408699393482e-06,
+ "loss": 0.4058,
+ "step": 184900
+ },
+ {
+ "epoch": 0.9542309656848329,
+ "grad_norm": 26143.732421875,
+ "learning_rate": 8.320679962413574e-06,
+ "loss": 0.4006,
+ "step": 184950
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "grad_norm": 24566.15234375,
+ "learning_rate": 8.300512233092893e-06,
+ "loss": 0.405,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "eval_loss": 0.3880694806575775,
+ "eval_runtime": 3197.8794,
+ "eval_samples_per_second": 969.743,
+ "eval_steps_per_second": 1.894,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9547469056500586,
+ "grad_norm": 22463.359375,
+ "learning_rate": 8.280366762151349e-06,
+ "loss": 0.4035,
+ "step": 185050
+ },
+ {
+ "epoch": 0.9550048756326713,
+ "grad_norm": 23964.845703125,
+ "learning_rate": 8.260243560342146e-06,
+ "loss": 0.399,
+ "step": 185100
+ },
+ {
+ "epoch": 0.9552628456152842,
+ "grad_norm": 22267.978515625,
+ "learning_rate": 8.2401426384067e-06,
+ "loss": 0.4065,
+ "step": 185150
+ },
+ {
+ "epoch": 0.955520815597897,
+ "grad_norm": 23959.732421875,
+ "learning_rate": 8.220064007074485e-06,
+ "loss": 0.3988,
+ "step": 185200
+ },
+ {
+ "epoch": 0.9557787855805099,
+ "grad_norm": 22042.95703125,
+ "learning_rate": 8.200007677063066e-06,
+ "loss": 0.4005,
+ "step": 185250
+ },
+ {
+ "epoch": 0.9560367555631226,
+ "grad_norm": 23760.798828125,
+ "learning_rate": 8.17997365907816e-06,
+ "loss": 0.4043,
+ "step": 185300
+ },
+ {
+ "epoch": 0.9562947255457355,
+ "grad_norm": 23235.8828125,
+ "learning_rate": 8.1599619638135e-06,
+ "loss": 0.3999,
+ "step": 185350
+ },
+ {
+ "epoch": 0.9565526955283483,
+ "grad_norm": 22637.701171875,
+ "learning_rate": 8.139972601950967e-06,
+ "loss": 0.4004,
+ "step": 185400
+ },
+ {
+ "epoch": 0.9568106655109612,
+ "grad_norm": 28806.810546875,
+ "learning_rate": 8.120005584160489e-06,
+ "loss": 0.4022,
+ "step": 185450
+ },
+ {
+ "epoch": 0.957068635493574,
+ "grad_norm": 22143.8203125,
+ "learning_rate": 8.100060921100067e-06,
+ "loss": 0.3977,
+ "step": 185500
+ },
+ {
+ "epoch": 0.9573266054761868,
+ "grad_norm": 22921.810546875,
+ "learning_rate": 8.080138623415783e-06,
+ "loss": 0.4,
+ "step": 185550
+ },
+ {
+ "epoch": 0.9575845754587996,
+ "grad_norm": 25425.640625,
+ "learning_rate": 8.060238701741762e-06,
+ "loss": 0.4021,
+ "step": 185600
+ },
+ {
+ "epoch": 0.9578425454414125,
+ "grad_norm": 27279.6796875,
+ "learning_rate": 8.040361166700216e-06,
+ "loss": 0.4064,
+ "step": 185650
+ },
+ {
+ "epoch": 0.9581005154240253,
+ "grad_norm": 25144.322265625,
+ "learning_rate": 8.020506028901376e-06,
+ "loss": 0.4031,
+ "step": 185700
+ },
+ {
+ "epoch": 0.958358485406638,
+ "grad_norm": 21046.607421875,
+ "learning_rate": 8.000673298943534e-06,
+ "loss": 0.4041,
+ "step": 185750
+ },
+ {
+ "epoch": 0.9586164553892509,
+ "grad_norm": 23166.087890625,
+ "learning_rate": 7.980862987413018e-06,
+ "loss": 0.3996,
+ "step": 185800
+ },
+ {
+ "epoch": 0.9588744253718637,
+ "grad_norm": 23506.693359375,
+ "learning_rate": 7.961075104884186e-06,
+ "loss": 0.3973,
+ "step": 185850
+ },
+ {
+ "epoch": 0.9591323953544766,
+ "grad_norm": 25975.408203125,
+ "learning_rate": 7.94130966191941e-06,
+ "loss": 0.4048,
+ "step": 185900
+ },
+ {
+ "epoch": 0.9593903653370893,
+ "grad_norm": 23704.638671875,
+ "learning_rate": 7.921566669069147e-06,
+ "loss": 0.4045,
+ "step": 185950
+ },
+ {
+ "epoch": 0.9596483353197022,
+ "grad_norm": 27402.2421875,
+ "learning_rate": 7.901846136871766e-06,
+ "loss": 0.4007,
+ "step": 186000
+ },
+ {
+ "epoch": 0.959906305302315,
+ "grad_norm": 23186.658203125,
+ "learning_rate": 7.882148075853752e-06,
+ "loss": 0.4072,
+ "step": 186050
+ },
+ {
+ "epoch": 0.9601642752849279,
+ "grad_norm": 24789.619140625,
+ "learning_rate": 7.862472496529528e-06,
+ "loss": 0.4056,
+ "step": 186100
+ },
+ {
+ "epoch": 0.9604222452675407,
+ "grad_norm": 23849.71875,
+ "learning_rate": 7.842819409401524e-06,
+ "loss": 0.4067,
+ "step": 186150
+ },
+ {
+ "epoch": 0.9606802152501535,
+ "grad_norm": 24820.765625,
+ "learning_rate": 7.823188824960221e-06,
+ "loss": 0.4071,
+ "step": 186200
+ },
+ {
+ "epoch": 0.9609381852327663,
+ "grad_norm": 23276.568359375,
+ "learning_rate": 7.803580753683992e-06,
+ "loss": 0.3989,
+ "step": 186250
+ },
+ {
+ "epoch": 0.9611961552153792,
+ "grad_norm": 21064.8984375,
+ "learning_rate": 7.783995206039279e-06,
+ "loss": 0.3994,
+ "step": 186300
+ },
+ {
+ "epoch": 0.961454125197992,
+ "grad_norm": 27310.30078125,
+ "learning_rate": 7.764432192480464e-06,
+ "loss": 0.4015,
+ "step": 186350
+ },
+ {
+ "epoch": 0.9617120951806047,
+ "grad_norm": 24786.1796875,
+ "learning_rate": 7.744891723449888e-06,
+ "loss": 0.4042,
+ "step": 186400
+ },
+ {
+ "epoch": 0.9619700651632176,
+ "grad_norm": 22362.47265625,
+ "learning_rate": 7.725373809377911e-06,
+ "loss": 0.3991,
+ "step": 186450
+ },
+ {
+ "epoch": 0.9622280351458304,
+ "grad_norm": 23751.4296875,
+ "learning_rate": 7.705878460682775e-06,
+ "loss": 0.3988,
+ "step": 186500
+ },
+ {
+ "epoch": 0.9624860051284433,
+ "grad_norm": 22956.935546875,
+ "learning_rate": 7.686405687770748e-06,
+ "loss": 0.4049,
+ "step": 186550
+ },
+ {
+ "epoch": 0.962743975111056,
+ "grad_norm": 25276.861328125,
+ "learning_rate": 7.666955501036006e-06,
+ "loss": 0.4005,
+ "step": 186600
+ },
+ {
+ "epoch": 0.9630019450936689,
+ "grad_norm": 22390.625,
+ "learning_rate": 7.647527910860691e-06,
+ "loss": 0.4008,
+ "step": 186650
+ },
+ {
+ "epoch": 0.9632599150762817,
+ "grad_norm": 28946.125,
+ "learning_rate": 7.628122927614856e-06,
+ "loss": 0.3987,
+ "step": 186700
+ },
+ {
+ "epoch": 0.9635178850588946,
+ "grad_norm": 23663.3125,
+ "learning_rate": 7.608740561656541e-06,
+ "loss": 0.4006,
+ "step": 186750
+ },
+ {
+ "epoch": 0.9637758550415074,
+ "grad_norm": 21705.16015625,
+ "learning_rate": 7.589380823331632e-06,
+ "loss": 0.4023,
+ "step": 186800
+ },
+ {
+ "epoch": 0.9640338250241202,
+ "grad_norm": 25353.228515625,
+ "learning_rate": 7.570043722974019e-06,
+ "loss": 0.4006,
+ "step": 186850
+ },
+ {
+ "epoch": 0.964291795006733,
+ "grad_norm": 26046.412109375,
+ "learning_rate": 7.55072927090546e-06,
+ "loss": 0.3931,
+ "step": 186900
+ },
+ {
+ "epoch": 0.9645497649893459,
+ "grad_norm": 25989.2578125,
+ "learning_rate": 7.531437477435621e-06,
+ "loss": 0.3989,
+ "step": 186950
+ },
+ {
+ "epoch": 0.9648077349719587,
+ "grad_norm": 22714.423828125,
+ "learning_rate": 7.51216835286212e-06,
+ "loss": 0.4018,
+ "step": 187000
+ },
+ {
+ "epoch": 0.9650657049545714,
+ "grad_norm": 26353.42578125,
+ "learning_rate": 7.492921907470407e-06,
+ "loss": 0.4056,
+ "step": 187050
+ },
+ {
+ "epoch": 0.9653236749371843,
+ "grad_norm": 23085.212890625,
+ "learning_rate": 7.4736981515338864e-06,
+ "loss": 0.3995,
+ "step": 187100
+ },
+ {
+ "epoch": 0.9655816449197971,
+ "grad_norm": 23125.970703125,
+ "learning_rate": 7.454497095313817e-06,
+ "loss": 0.4069,
+ "step": 187150
+ },
+ {
+ "epoch": 0.96583961490241,
+ "grad_norm": 23488.2265625,
+ "learning_rate": 7.435318749059356e-06,
+ "loss": 0.4039,
+ "step": 187200
+ },
+ {
+ "epoch": 0.9660975848850227,
+ "grad_norm": 22577.46875,
+ "learning_rate": 7.4161631230075305e-06,
+ "loss": 0.4051,
+ "step": 187250
+ },
+ {
+ "epoch": 0.9663555548676356,
+ "grad_norm": 22637.890625,
+ "learning_rate": 7.397030227383228e-06,
+ "loss": 0.3986,
+ "step": 187300
+ },
+ {
+ "epoch": 0.9666135248502484,
+ "grad_norm": 26084.412109375,
+ "learning_rate": 7.377920072399247e-06,
+ "loss": 0.398,
+ "step": 187350
+ },
+ {
+ "epoch": 0.9668714948328613,
+ "grad_norm": 25263.6328125,
+ "learning_rate": 7.3588326682562e-06,
+ "loss": 0.4035,
+ "step": 187400
+ },
+ {
+ "epoch": 0.9671294648154741,
+ "grad_norm": 22348.236328125,
+ "learning_rate": 7.339768025142573e-06,
+ "loss": 0.4003,
+ "step": 187450
+ },
+ {
+ "epoch": 0.9673874347980869,
+ "grad_norm": 23006.091796875,
+ "learning_rate": 7.320726153234714e-06,
+ "loss": 0.399,
+ "step": 187500
+ },
+ {
+ "epoch": 0.9676454047806997,
+ "grad_norm": 24137.44921875,
+ "learning_rate": 7.301707062696794e-06,
+ "loss": 0.3999,
+ "step": 187550
+ },
+ {
+ "epoch": 0.9679033747633126,
+ "grad_norm": 26101.837890625,
+ "learning_rate": 7.282710763680828e-06,
+ "loss": 0.4007,
+ "step": 187600
+ },
+ {
+ "epoch": 0.9681613447459254,
+ "grad_norm": 21417.814453125,
+ "learning_rate": 7.263737266326709e-06,
+ "loss": 0.3994,
+ "step": 187650
+ },
+ {
+ "epoch": 0.9684193147285381,
+ "grad_norm": 25831.45703125,
+ "learning_rate": 7.244786580762075e-06,
+ "loss": 0.3925,
+ "step": 187700
+ },
+ {
+ "epoch": 0.968677284711151,
+ "grad_norm": 24546.84765625,
+ "learning_rate": 7.225858717102474e-06,
+ "loss": 0.4004,
+ "step": 187750
+ },
+ {
+ "epoch": 0.9689352546937638,
+ "grad_norm": 23773.09765625,
+ "learning_rate": 7.206953685451212e-06,
+ "loss": 0.4041,
+ "step": 187800
+ },
+ {
+ "epoch": 0.9691932246763767,
+ "grad_norm": 23538.923828125,
+ "learning_rate": 7.188071495899423e-06,
+ "loss": 0.3971,
+ "step": 187850
+ },
+ {
+ "epoch": 0.9694511946589894,
+ "grad_norm": 24968.310546875,
+ "learning_rate": 7.169212158526084e-06,
+ "loss": 0.4047,
+ "step": 187900
+ },
+ {
+ "epoch": 0.9697091646416023,
+ "grad_norm": 24379.23828125,
+ "learning_rate": 7.150375683397908e-06,
+ "loss": 0.3983,
+ "step": 187950
+ },
+ {
+ "epoch": 0.9699671346242151,
+ "grad_norm": 25501.638671875,
+ "learning_rate": 7.131562080569465e-06,
+ "loss": 0.4024,
+ "step": 188000
+ },
+ {
+ "epoch": 0.970225104606828,
+ "grad_norm": 24917.73046875,
+ "learning_rate": 7.112771360083087e-06,
+ "loss": 0.3998,
+ "step": 188050
+ },
+ {
+ "epoch": 0.9704830745894407,
+ "grad_norm": 24725.638671875,
+ "learning_rate": 7.094003531968896e-06,
+ "loss": 0.3964,
+ "step": 188100
+ },
+ {
+ "epoch": 0.9707410445720536,
+ "grad_norm": 23913.5703125,
+ "learning_rate": 7.075258606244789e-06,
+ "loss": 0.3987,
+ "step": 188150
+ },
+ {
+ "epoch": 0.9709990145546664,
+ "grad_norm": 25010.09375,
+ "learning_rate": 7.05653659291644e-06,
+ "loss": 0.4021,
+ "step": 188200
+ },
+ {
+ "epoch": 0.9712569845372793,
+ "grad_norm": 25357.556640625,
+ "learning_rate": 7.037837501977318e-06,
+ "loss": 0.4007,
+ "step": 188250
+ },
+ {
+ "epoch": 0.9715149545198921,
+ "grad_norm": 24599.890625,
+ "learning_rate": 7.019161343408625e-06,
+ "loss": 0.3962,
+ "step": 188300
+ },
+ {
+ "epoch": 0.9717729245025049,
+ "grad_norm": 25866.2734375,
+ "learning_rate": 7.000508127179328e-06,
+ "loss": 0.3983,
+ "step": 188350
+ },
+ {
+ "epoch": 0.9720308944851177,
+ "grad_norm": 22591.40625,
+ "learning_rate": 6.981877863246161e-06,
+ "loss": 0.3971,
+ "step": 188400
+ },
+ {
+ "epoch": 0.9722888644677306,
+ "grad_norm": 20752.091796875,
+ "learning_rate": 6.963270561553586e-06,
+ "loss": 0.3946,
+ "step": 188450
+ },
+ {
+ "epoch": 0.9725468344503434,
+ "grad_norm": 22927.109375,
+ "learning_rate": 6.94468623203382e-06,
+ "loss": 0.4036,
+ "step": 188500
+ },
+ {
+ "epoch": 0.9728048044329561,
+ "grad_norm": 27096.041015625,
+ "learning_rate": 6.92612488460685e-06,
+ "loss": 0.3982,
+ "step": 188550
+ },
+ {
+ "epoch": 0.973062774415569,
+ "grad_norm": 24426.93359375,
+ "learning_rate": 6.907586529180321e-06,
+ "loss": 0.4054,
+ "step": 188600
+ },
+ {
+ "epoch": 0.9733207443981818,
+ "grad_norm": 25097.658203125,
+ "learning_rate": 6.889071175649669e-06,
+ "loss": 0.4015,
+ "step": 188650
+ },
+ {
+ "epoch": 0.9735787143807947,
+ "grad_norm": 24646.548828125,
+ "learning_rate": 6.870578833898033e-06,
+ "loss": 0.3977,
+ "step": 188700
+ },
+ {
+ "epoch": 0.9738366843634074,
+ "grad_norm": 23465.357421875,
+ "learning_rate": 6.852109513796257e-06,
+ "loss": 0.396,
+ "step": 188750
+ },
+ {
+ "epoch": 0.9740946543460203,
+ "grad_norm": 22382.603515625,
+ "learning_rate": 6.83366322520293e-06,
+ "loss": 0.4018,
+ "step": 188800
+ },
+ {
+ "epoch": 0.9743526243286331,
+ "grad_norm": 24666.61328125,
+ "learning_rate": 6.815239977964283e-06,
+ "loss": 0.4046,
+ "step": 188850
+ },
+ {
+ "epoch": 0.974610594311246,
+ "grad_norm": 25308.685546875,
+ "learning_rate": 6.796839781914321e-06,
+ "loss": 0.3998,
+ "step": 188900
+ },
+ {
+ "epoch": 0.9748685642938588,
+ "grad_norm": 24856.64453125,
+ "learning_rate": 6.778462646874706e-06,
+ "loss": 0.4014,
+ "step": 188950
+ },
+ {
+ "epoch": 0.9751265342764716,
+ "grad_norm": 27452.50390625,
+ "learning_rate": 6.760108582654795e-06,
+ "loss": 0.4008,
+ "step": 189000
+ },
+ {
+ "epoch": 0.9753845042590844,
+ "grad_norm": 25027.416015625,
+ "learning_rate": 6.741777599051629e-06,
+ "loss": 0.4006,
+ "step": 189050
+ },
+ {
+ "epoch": 0.9756424742416973,
+ "grad_norm": 24687.740234375,
+ "learning_rate": 6.723469705849927e-06,
+ "loss": 0.4056,
+ "step": 189100
+ },
+ {
+ "epoch": 0.9759004442243101,
+ "grad_norm": 24812.55078125,
+ "learning_rate": 6.705184912822105e-06,
+ "loss": 0.4043,
+ "step": 189150
+ },
+ {
+ "epoch": 0.9761584142069228,
+ "grad_norm": 25776.005859375,
+ "learning_rate": 6.686923229728214e-06,
+ "loss": 0.4052,
+ "step": 189200
+ },
+ {
+ "epoch": 0.9764163841895357,
+ "grad_norm": 24319.34765625,
+ "learning_rate": 6.668684666316005e-06,
+ "loss": 0.4014,
+ "step": 189250
+ },
+ {
+ "epoch": 0.9766743541721485,
+ "grad_norm": 28024.419921875,
+ "learning_rate": 6.650469232320839e-06,
+ "loss": 0.3991,
+ "step": 189300
+ },
+ {
+ "epoch": 0.9769323241547614,
+ "grad_norm": 25074.068359375,
+ "learning_rate": 6.6322769374658085e-06,
+ "loss": 0.4034,
+ "step": 189350
+ },
+ {
+ "epoch": 0.9771902941373741,
+ "grad_norm": 21126.572265625,
+ "learning_rate": 6.61410779146156e-06,
+ "loss": 0.3998,
+ "step": 189400
+ },
+ {
+ "epoch": 0.977448264119987,
+ "grad_norm": 25041.337890625,
+ "learning_rate": 6.595961804006467e-06,
+ "loss": 0.4012,
+ "step": 189450
+ },
+ {
+ "epoch": 0.9777062341025998,
+ "grad_norm": 25474.263671875,
+ "learning_rate": 6.577838984786489e-06,
+ "loss": 0.3991,
+ "step": 189500
+ },
+ {
+ "epoch": 0.9779642040852127,
+ "grad_norm": 22192.98828125,
+ "learning_rate": 6.55973934347523e-06,
+ "loss": 0.3965,
+ "step": 189550
+ },
+ {
+ "epoch": 0.9782221740678255,
+ "grad_norm": 24587.9453125,
+ "learning_rate": 6.5416628897339625e-06,
+ "loss": 0.4008,
+ "step": 189600
+ },
+ {
+ "epoch": 0.9784801440504383,
+ "grad_norm": 23246.314453125,
+ "learning_rate": 6.523609633211497e-06,
+ "loss": 0.4036,
+ "step": 189650
+ },
+ {
+ "epoch": 0.9787381140330511,
+ "grad_norm": 24233.033203125,
+ "learning_rate": 6.505579583544353e-06,
+ "loss": 0.4002,
+ "step": 189700
+ },
+ {
+ "epoch": 0.978996084015664,
+ "grad_norm": 24149.6953125,
+ "learning_rate": 6.487572750356602e-06,
+ "loss": 0.4043,
+ "step": 189750
+ },
+ {
+ "epoch": 0.9792540539982768,
+ "grad_norm": 25376.3046875,
+ "learning_rate": 6.469589143259952e-06,
+ "loss": 0.3997,
+ "step": 189800
+ },
+ {
+ "epoch": 0.9795120239808895,
+ "grad_norm": 25878.90625,
+ "learning_rate": 6.451628771853696e-06,
+ "loss": 0.3936,
+ "step": 189850
+ },
+ {
+ "epoch": 0.9797699939635024,
+ "grad_norm": 24123.169921875,
+ "learning_rate": 6.433691645724743e-06,
+ "loss": 0.3976,
+ "step": 189900
+ },
+ {
+ "epoch": 0.9800279639461152,
+ "grad_norm": 23894.5625,
+ "learning_rate": 6.4157777744475626e-06,
+ "loss": 0.4025,
+ "step": 189950
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "grad_norm": 27271.9609375,
+ "learning_rate": 6.3978871675842544e-06,
+ "loss": 0.4007,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "eval_loss": 0.3872862458229065,
+ "eval_runtime": 3184.1416,
+ "eval_samples_per_second": 973.927,
+ "eval_steps_per_second": 1.902,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9805439039113408,
+ "grad_norm": 25592.9296875,
+ "learning_rate": 6.380019834684475e-06,
+ "loss": 0.4041,
+ "step": 190050
+ },
+ {
+ "epoch": 0.9808018738939537,
+ "grad_norm": 22425.51953125,
+ "learning_rate": 6.362175785285457e-06,
+ "loss": 0.4028,
+ "step": 190100
+ },
+ {
+ "epoch": 0.9810598438765665,
+ "grad_norm": 25178.28125,
+ "learning_rate": 6.344355028912008e-06,
+ "loss": 0.3972,
+ "step": 190150
+ },
+ {
+ "epoch": 0.9813178138591794,
+ "grad_norm": 25157.537109375,
+ "learning_rate": 6.326557575076486e-06,
+ "loss": 0.3989,
+ "step": 190200
+ },
+ {
+ "epoch": 0.9815757838417921,
+ "grad_norm": 23774.67578125,
+ "learning_rate": 6.3087834332788695e-06,
+ "loss": 0.4057,
+ "step": 190250
+ },
+ {
+ "epoch": 0.981833753824405,
+ "grad_norm": 25307.736328125,
+ "learning_rate": 6.2910326130066035e-06,
+ "loss": 0.3946,
+ "step": 190300
+ },
+ {
+ "epoch": 0.9820917238070178,
+ "grad_norm": 28657.8125,
+ "learning_rate": 6.273305123734769e-06,
+ "loss": 0.4006,
+ "step": 190350
+ },
+ {
+ "epoch": 0.9823496937896307,
+ "grad_norm": 24404.603515625,
+ "learning_rate": 6.255600974925935e-06,
+ "loss": 0.3998,
+ "step": 190400
+ },
+ {
+ "epoch": 0.9826076637722435,
+ "grad_norm": 22460.1640625,
+ "learning_rate": 6.237920176030232e-06,
+ "loss": 0.4039,
+ "step": 190450
+ },
+ {
+ "epoch": 0.9828656337548562,
+ "grad_norm": 27335.625,
+ "learning_rate": 6.220262736485355e-06,
+ "loss": 0.3937,
+ "step": 190500
+ },
+ {
+ "epoch": 0.9831236037374691,
+ "grad_norm": 27996.9765625,
+ "learning_rate": 6.202628665716464e-06,
+ "loss": 0.4025,
+ "step": 190550
+ },
+ {
+ "epoch": 0.983381573720082,
+ "grad_norm": 23532.66796875,
+ "learning_rate": 6.18501797313632e-06,
+ "loss": 0.4007,
+ "step": 190600
+ },
+ {
+ "epoch": 0.9836395437026948,
+ "grad_norm": 27360.333984375,
+ "learning_rate": 6.167430668145146e-06,
+ "loss": 0.3994,
+ "step": 190650
+ },
+ {
+ "epoch": 0.9838975136853075,
+ "grad_norm": 23754.23828125,
+ "learning_rate": 6.149866760130718e-06,
+ "loss": 0.4043,
+ "step": 190700
+ },
+ {
+ "epoch": 0.9841554836679204,
+ "grad_norm": 24313.943359375,
+ "learning_rate": 6.1323262584683075e-06,
+ "loss": 0.4039,
+ "step": 190750
+ },
+ {
+ "epoch": 0.9844134536505332,
+ "grad_norm": 22932.11328125,
+ "learning_rate": 6.114809172520686e-06,
+ "loss": 0.3977,
+ "step": 190800
+ },
+ {
+ "epoch": 0.9846714236331461,
+ "grad_norm": 27614.103515625,
+ "learning_rate": 6.097315511638135e-06,
+ "loss": 0.405,
+ "step": 190850
+ },
+ {
+ "epoch": 0.9849293936157588,
+ "grad_norm": 21648.470703125,
+ "learning_rate": 6.079845285158447e-06,
+ "loss": 0.403,
+ "step": 190900
+ },
+ {
+ "epoch": 0.9851873635983717,
+ "grad_norm": 25720.76953125,
+ "learning_rate": 6.0623985024068854e-06,
+ "loss": 0.4069,
+ "step": 190950
+ },
+ {
+ "epoch": 0.9854453335809845,
+ "grad_norm": 22051.30078125,
+ "learning_rate": 6.044975172696199e-06,
+ "loss": 0.4062,
+ "step": 191000
+ },
+ {
+ "epoch": 0.9857033035635974,
+ "grad_norm": 27862.138671875,
+ "learning_rate": 6.027575305326621e-06,
+ "loss": 0.4029,
+ "step": 191050
+ },
+ {
+ "epoch": 0.9859612735462102,
+ "grad_norm": 24624.951171875,
+ "learning_rate": 6.010198909585862e-06,
+ "loss": 0.3995,
+ "step": 191100
+ },
+ {
+ "epoch": 0.986219243528823,
+ "grad_norm": 23278.45703125,
+ "learning_rate": 5.992845994749136e-06,
+ "loss": 0.3981,
+ "step": 191150
+ },
+ {
+ "epoch": 0.9864772135114358,
+ "grad_norm": 27549.26953125,
+ "learning_rate": 5.975516570079048e-06,
+ "loss": 0.3999,
+ "step": 191200
+ },
+ {
+ "epoch": 0.9867351834940487,
+ "grad_norm": 24570.40625,
+ "learning_rate": 5.95821064482574e-06,
+ "loss": 0.4052,
+ "step": 191250
+ },
+ {
+ "epoch": 0.9869931534766615,
+ "grad_norm": 23672.029296875,
+ "learning_rate": 5.9409282282267665e-06,
+ "loss": 0.4045,
+ "step": 191300
+ },
+ {
+ "epoch": 0.9872511234592742,
+ "grad_norm": 22627.697265625,
+ "learning_rate": 5.923669329507148e-06,
+ "loss": 0.4017,
+ "step": 191350
+ },
+ {
+ "epoch": 0.9875090934418871,
+ "grad_norm": 22583.0390625,
+ "learning_rate": 5.906433957879365e-06,
+ "loss": 0.399,
+ "step": 191400
+ },
+ {
+ "epoch": 0.9877670634244999,
+ "grad_norm": 22665.984375,
+ "learning_rate": 5.889222122543298e-06,
+ "loss": 0.3989,
+ "step": 191450
+ },
+ {
+ "epoch": 0.9880250334071128,
+ "grad_norm": 25125.6640625,
+ "learning_rate": 5.872033832686319e-06,
+ "loss": 0.4001,
+ "step": 191500
+ },
+ {
+ "epoch": 0.9882830033897255,
+ "grad_norm": 24863.34375,
+ "learning_rate": 5.8548690974831845e-06,
+ "loss": 0.3991,
+ "step": 191550
+ },
+ {
+ "epoch": 0.9885409733723384,
+ "grad_norm": 23538.44921875,
+ "learning_rate": 5.837727926096109e-06,
+ "loss": 0.3979,
+ "step": 191600
+ },
+ {
+ "epoch": 0.9887989433549512,
+ "grad_norm": 23396.3203125,
+ "learning_rate": 5.820610327674708e-06,
+ "loss": 0.4049,
+ "step": 191650
+ },
+ {
+ "epoch": 0.9890569133375641,
+ "grad_norm": 22553.01171875,
+ "learning_rate": 5.803516311356044e-06,
+ "loss": 0.3983,
+ "step": 191700
+ },
+ {
+ "epoch": 0.9893148833201769,
+ "grad_norm": 25163.04296875,
+ "learning_rate": 5.786445886264541e-06,
+ "loss": 0.3969,
+ "step": 191750
+ },
+ {
+ "epoch": 0.9895728533027897,
+ "grad_norm": 22826.181640625,
+ "learning_rate": 5.769399061512093e-06,
+ "loss": 0.4016,
+ "step": 191800
+ },
+ {
+ "epoch": 0.9898308232854025,
+ "grad_norm": 22302.7265625,
+ "learning_rate": 5.752375846197944e-06,
+ "loss": 0.3988,
+ "step": 191850
+ },
+ {
+ "epoch": 0.9900887932680154,
+ "grad_norm": 20985.990234375,
+ "learning_rate": 5.735376249408753e-06,
+ "loss": 0.3952,
+ "step": 191900
+ },
+ {
+ "epoch": 0.9903467632506282,
+ "grad_norm": 23513.19921875,
+ "learning_rate": 5.718400280218611e-06,
+ "loss": 0.4052,
+ "step": 191950
+ },
+ {
+ "epoch": 0.9906047332332409,
+ "grad_norm": 23184.818359375,
+ "learning_rate": 5.7014479476889145e-06,
+ "loss": 0.399,
+ "step": 192000
+ },
+ {
+ "epoch": 0.9908627032158538,
+ "grad_norm": 23472.9453125,
+ "learning_rate": 5.684519260868521e-06,
+ "loss": 0.3946,
+ "step": 192050
+ },
+ {
+ "epoch": 0.9911206731984666,
+ "grad_norm": 26255.388671875,
+ "learning_rate": 5.667614228793622e-06,
+ "loss": 0.3964,
+ "step": 192100
+ },
+ {
+ "epoch": 0.9913786431810795,
+ "grad_norm": 23894.54296875,
+ "learning_rate": 5.650732860487806e-06,
+ "loss": 0.3928,
+ "step": 192150
+ },
+ {
+ "epoch": 0.9916366131636922,
+ "grad_norm": 24135.478515625,
+ "learning_rate": 5.633875164962016e-06,
+ "loss": 0.4019,
+ "step": 192200
+ },
+ {
+ "epoch": 0.9918945831463051,
+ "grad_norm": 26928.08984375,
+ "learning_rate": 5.617041151214553e-06,
+ "loss": 0.3958,
+ "step": 192250
+ },
+ {
+ "epoch": 0.9921525531289179,
+ "grad_norm": 22469.884765625,
+ "learning_rate": 5.600230828231107e-06,
+ "loss": 0.4031,
+ "step": 192300
+ },
+ {
+ "epoch": 0.9924105231115308,
+ "grad_norm": 23694.59765625,
+ "learning_rate": 5.583444204984695e-06,
+ "loss": 0.3926,
+ "step": 192350
+ },
+ {
+ "epoch": 0.9926684930941435,
+ "grad_norm": 23482.986328125,
+ "learning_rate": 5.566681290435688e-06,
+ "loss": 0.4112,
+ "step": 192400
+ },
+ {
+ "epoch": 0.9929264630767564,
+ "grad_norm": 22524.994140625,
+ "learning_rate": 5.549942093531812e-06,
+ "loss": 0.3981,
+ "step": 192450
+ },
+ {
+ "epoch": 0.9931844330593692,
+ "grad_norm": 27258.35546875,
+ "learning_rate": 5.5332266232081155e-06,
+ "loss": 0.4024,
+ "step": 192500
+ },
+ {
+ "epoch": 0.9934424030419821,
+ "grad_norm": 19928.40625,
+ "learning_rate": 5.516534888386992e-06,
+ "loss": 0.4028,
+ "step": 192550
+ },
+ {
+ "epoch": 0.9937003730245949,
+ "grad_norm": 21809.205078125,
+ "learning_rate": 5.499866897978189e-06,
+ "loss": 0.3996,
+ "step": 192600
+ },
+ {
+ "epoch": 0.9939583430072076,
+ "grad_norm": 22132.6171875,
+ "learning_rate": 5.483222660878729e-06,
+ "loss": 0.4012,
+ "step": 192650
+ },
+ {
+ "epoch": 0.9942163129898205,
+ "grad_norm": 25306.728515625,
+ "learning_rate": 5.466602185973002e-06,
+ "loss": 0.3987,
+ "step": 192700
+ },
+ {
+ "epoch": 0.9944742829724333,
+ "grad_norm": 29266.78515625,
+ "learning_rate": 5.4500054821326865e-06,
+ "loss": 0.4028,
+ "step": 192750
+ },
+ {
+ "epoch": 0.9947322529550462,
+ "grad_norm": 23506.931640625,
+ "learning_rate": 5.433432558216778e-06,
+ "loss": 0.3948,
+ "step": 192800
+ },
+ {
+ "epoch": 0.9949902229376589,
+ "grad_norm": 22564.177734375,
+ "learning_rate": 5.416883423071606e-06,
+ "loss": 0.4015,
+ "step": 192850
+ },
+ {
+ "epoch": 0.9952481929202718,
+ "grad_norm": 24564.380859375,
+ "learning_rate": 5.400358085530738e-06,
+ "loss": 0.4046,
+ "step": 192900
+ },
+ {
+ "epoch": 0.9955061629028846,
+ "grad_norm": 24793.91796875,
+ "learning_rate": 5.383856554415117e-06,
+ "loss": 0.4003,
+ "step": 192950
+ },
+ {
+ "epoch": 0.9957641328854975,
+ "grad_norm": 23798.228515625,
+ "learning_rate": 5.367378838532927e-06,
+ "loss": 0.3982,
+ "step": 193000
+ },
+ {
+ "epoch": 0.9960221028681102,
+ "grad_norm": 23164.642578125,
+ "learning_rate": 5.350924946679653e-06,
+ "loss": 0.3977,
+ "step": 193050
+ },
+ {
+ "epoch": 0.9962800728507231,
+ "grad_norm": 25646.29296875,
+ "learning_rate": 5.334494887638058e-06,
+ "loss": 0.3992,
+ "step": 193100
+ },
+ {
+ "epoch": 0.9965380428333359,
+ "grad_norm": 24146.2421875,
+ "learning_rate": 5.318088670178189e-06,
+ "loss": 0.4037,
+ "step": 193150
+ },
+ {
+ "epoch": 0.9967960128159488,
+ "grad_norm": 22594.72265625,
+ "learning_rate": 5.301706303057386e-06,
+ "loss": 0.4004,
+ "step": 193200
+ },
+ {
+ "epoch": 0.9970539827985616,
+ "grad_norm": 23395.515625,
+ "learning_rate": 5.285347795020224e-06,
+ "loss": 0.3958,
+ "step": 193250
+ },
+ {
+ "epoch": 0.9973119527811743,
+ "grad_norm": 23383.431640625,
+ "learning_rate": 5.269013154798558e-06,
+ "loss": 0.3998,
+ "step": 193300
+ },
+ {
+ "epoch": 0.9975699227637872,
+ "grad_norm": 20586.341796875,
+ "learning_rate": 5.252702391111508e-06,
+ "loss": 0.3979,
+ "step": 193350
+ },
+ {
+ "epoch": 0.9978278927464,
+ "grad_norm": 26526.83203125,
+ "learning_rate": 5.236415512665438e-06,
+ "loss": 0.4036,
+ "step": 193400
+ },
+ {
+ "epoch": 0.9980858627290129,
+ "grad_norm": 25045.224609375,
+ "learning_rate": 5.220152528153965e-06,
+ "loss": 0.4028,
+ "step": 193450
+ },
+ {
+ "epoch": 0.9983438327116256,
+ "grad_norm": 23480.755859375,
+ "learning_rate": 5.20391344625798e-06,
+ "loss": 0.4053,
+ "step": 193500
+ },
+ {
+ "epoch": 0.9986018026942385,
+ "grad_norm": 25235.927734375,
+ "learning_rate": 5.187698275645553e-06,
+ "loss": 0.3964,
+ "step": 193550
+ },
+ {
+ "epoch": 0.9988597726768513,
+ "grad_norm": 24883.29296875,
+ "learning_rate": 5.1715070249720555e-06,
+ "loss": 0.3978,
+ "step": 193600
+ },
+ {
+ "epoch": 0.9991177426594642,
+ "grad_norm": 25161.71484375,
+ "learning_rate": 5.155339702880052e-06,
+ "loss": 0.3998,
+ "step": 193650
+ },
+ {
+ "epoch": 0.9993757126420769,
+ "grad_norm": 21524.724609375,
+ "learning_rate": 5.13919631799934e-06,
+ "loss": 0.3955,
+ "step": 193700
+ },
+ {
+ "epoch": 0.9996336826246898,
+ "grad_norm": 23394.1015625,
+ "learning_rate": 5.123076878946981e-06,
+ "loss": 0.3962,
+ "step": 193750
+ },
+ {
+ "epoch": 0.9998916526073026,
+ "grad_norm": 24562.419921875,
+ "learning_rate": 5.106981394327165e-06,
+ "loss": 0.4,
+ "step": 193800
+ },
+ {
+ "epoch": 1.0001496225899154,
+ "grad_norm": 23818.201171875,
+ "learning_rate": 5.090909872731392e-06,
+ "loss": 0.4065,
+ "step": 193850
+ },
+ {
+ "epoch": 1.0004075925725282,
+ "grad_norm": 25973.83984375,
+ "learning_rate": 5.074862322738316e-06,
+ "loss": 0.4015,
+ "step": 193900
+ },
+ {
+ "epoch": 1.000665562555141,
+ "grad_norm": 26476.041015625,
+ "learning_rate": 5.0588387529138085e-06,
+ "loss": 0.401,
+ "step": 193950
+ },
+ {
+ "epoch": 1.000923532537754,
+ "grad_norm": 22776.267578125,
+ "learning_rate": 5.042839171810937e-06,
+ "loss": 0.4021,
+ "step": 194000
+ },
+ {
+ "epoch": 1.0011815025203668,
+ "grad_norm": 22484.884765625,
+ "learning_rate": 5.026863587969966e-06,
+ "loss": 0.4013,
+ "step": 194050
+ },
+ {
+ "epoch": 1.0014394725029796,
+ "grad_norm": 21445.009765625,
+ "learning_rate": 5.010912009918361e-06,
+ "loss": 0.4001,
+ "step": 194100
+ },
+ {
+ "epoch": 1.0016974424855924,
+ "grad_norm": 23748.365234375,
+ "learning_rate": 4.994984446170764e-06,
+ "loss": 0.3985,
+ "step": 194150
+ },
+ {
+ "epoch": 1.0019554124682053,
+ "grad_norm": 25007.73828125,
+ "learning_rate": 4.9790809052289996e-06,
+ "loss": 0.403,
+ "step": 194200
+ },
+ {
+ "epoch": 1.002213382450818,
+ "grad_norm": 26824.900390625,
+ "learning_rate": 4.963201395582062e-06,
+ "loss": 0.3966,
+ "step": 194250
+ },
+ {
+ "epoch": 1.0024713524334308,
+ "grad_norm": 21838.662109375,
+ "learning_rate": 4.947345925706148e-06,
+ "loss": 0.3955,
+ "step": 194300
+ },
+ {
+ "epoch": 1.0027293224160436,
+ "grad_norm": 20830.59375,
+ "learning_rate": 4.931514504064566e-06,
+ "loss": 0.3976,
+ "step": 194350
+ },
+ {
+ "epoch": 1.0029872923986565,
+ "grad_norm": 24187.484375,
+ "learning_rate": 4.915707139107856e-06,
+ "loss": 0.4009,
+ "step": 194400
+ },
+ {
+ "epoch": 1.0032452623812693,
+ "grad_norm": 23026.99609375,
+ "learning_rate": 4.899923839273662e-06,
+ "loss": 0.4017,
+ "step": 194450
+ },
+ {
+ "epoch": 1.0035032323638822,
+ "grad_norm": 25855.919921875,
+ "learning_rate": 4.884164612986808e-06,
+ "loss": 0.3966,
+ "step": 194500
+ },
+ {
+ "epoch": 1.003761202346495,
+ "grad_norm": 23424.58984375,
+ "learning_rate": 4.86842946865928e-06,
+ "loss": 0.4007,
+ "step": 194550
+ },
+ {
+ "epoch": 1.0040191723291079,
+ "grad_norm": 20644.318359375,
+ "learning_rate": 4.852718414690166e-06,
+ "loss": 0.405,
+ "step": 194600
+ },
+ {
+ "epoch": 1.0042771423117207,
+ "grad_norm": 24923.30078125,
+ "learning_rate": 4.8370314594657405e-06,
+ "loss": 0.3961,
+ "step": 194650
+ },
+ {
+ "epoch": 1.0045351122943333,
+ "grad_norm": 23334.19921875,
+ "learning_rate": 4.821368611359395e-06,
+ "loss": 0.3981,
+ "step": 194700
+ },
+ {
+ "epoch": 1.0047930822769462,
+ "grad_norm": 24258.54296875,
+ "learning_rate": 4.8057298787316516e-06,
+ "loss": 0.3998,
+ "step": 194750
+ },
+ {
+ "epoch": 1.005051052259559,
+ "grad_norm": 23366.234375,
+ "learning_rate": 4.790115269930162e-06,
+ "loss": 0.3998,
+ "step": 194800
+ },
+ {
+ "epoch": 1.005309022242172,
+ "grad_norm": 22389.498046875,
+ "learning_rate": 4.774524793289692e-06,
+ "loss": 0.4025,
+ "step": 194850
+ },
+ {
+ "epoch": 1.0055669922247847,
+ "grad_norm": 25497.361328125,
+ "learning_rate": 4.758958457132157e-06,
+ "loss": 0.3979,
+ "step": 194900
+ },
+ {
+ "epoch": 1.0058249622073976,
+ "grad_norm": 24179.626953125,
+ "learning_rate": 4.7434162697665595e-06,
+ "loss": 0.3984,
+ "step": 194950
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "grad_norm": 24002.955078125,
+ "learning_rate": 4.727898239489015e-06,
+ "loss": 0.398,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "eval_loss": 0.3868441879749298,
+ "eval_runtime": 3205.6792,
+ "eval_samples_per_second": 967.383,
+ "eval_steps_per_second": 1.889,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0063409021726233,
+ "grad_norm": 26567.27734375,
+ "learning_rate": 4.712404374582741e-06,
+ "loss": 0.399,
+ "step": 195050
+ },
+ {
+ "epoch": 1.006598872155236,
+ "grad_norm": 25244.615234375,
+ "learning_rate": 4.696934683318077e-06,
+ "loss": 0.3998,
+ "step": 195100
+ },
+ {
+ "epoch": 1.0068568421378488,
+ "grad_norm": 23278.265625,
+ "learning_rate": 4.6814891739524195e-06,
+ "loss": 0.4002,
+ "step": 195150
+ },
+ {
+ "epoch": 1.0071148121204616,
+ "grad_norm": 23141.138671875,
+ "learning_rate": 4.666067854730322e-06,
+ "loss": 0.3965,
+ "step": 195200
+ },
+ {
+ "epoch": 1.0073727821030745,
+ "grad_norm": 23506.640625,
+ "learning_rate": 4.650670733883344e-06,
+ "loss": 0.3962,
+ "step": 195250
+ },
+ {
+ "epoch": 1.0076307520856873,
+ "grad_norm": 26591.212890625,
+ "learning_rate": 4.635297819630202e-06,
+ "loss": 0.3992,
+ "step": 195300
+ },
+ {
+ "epoch": 1.0078887220683002,
+ "grad_norm": 22111.640625,
+ "learning_rate": 4.619949120176642e-06,
+ "loss": 0.401,
+ "step": 195350
+ },
+ {
+ "epoch": 1.008146692050913,
+ "grad_norm": 25048.17578125,
+ "learning_rate": 4.604624643715505e-06,
+ "loss": 0.4016,
+ "step": 195400
+ },
+ {
+ "epoch": 1.0084046620335259,
+ "grad_norm": 23263.23828125,
+ "learning_rate": 4.589324398426714e-06,
+ "loss": 0.3942,
+ "step": 195450
+ },
+ {
+ "epoch": 1.0086626320161387,
+ "grad_norm": 23640.9296875,
+ "learning_rate": 4.57404839247722e-06,
+ "loss": 0.4039,
+ "step": 195500
+ },
+ {
+ "epoch": 1.0089206019987513,
+ "grad_norm": 25680.390625,
+ "learning_rate": 4.558796634021079e-06,
+ "loss": 0.3986,
+ "step": 195550
+ },
+ {
+ "epoch": 1.0091785719813642,
+ "grad_norm": 23321.78125,
+ "learning_rate": 4.543569131199382e-06,
+ "loss": 0.4039,
+ "step": 195600
+ },
+ {
+ "epoch": 1.009436541963977,
+ "grad_norm": 24123.205078125,
+ "learning_rate": 4.528365892140263e-06,
+ "loss": 0.397,
+ "step": 195650
+ },
+ {
+ "epoch": 1.0096945119465899,
+ "grad_norm": 23332.673828125,
+ "learning_rate": 4.513186924958928e-06,
+ "loss": 0.3941,
+ "step": 195700
+ },
+ {
+ "epoch": 1.0099524819292027,
+ "grad_norm": 25583.609375,
+ "learning_rate": 4.498032237757605e-06,
+ "loss": 0.4046,
+ "step": 195750
+ },
+ {
+ "epoch": 1.0102104519118156,
+ "grad_norm": 25230.3515625,
+ "learning_rate": 4.482901838625586e-06,
+ "loss": 0.4012,
+ "step": 195800
+ },
+ {
+ "epoch": 1.0104684218944284,
+ "grad_norm": 24376.5859375,
+ "learning_rate": 4.46779573563918e-06,
+ "loss": 0.3911,
+ "step": 195850
+ },
+ {
+ "epoch": 1.0107263918770413,
+ "grad_norm": 23978.17578125,
+ "learning_rate": 4.452713936861724e-06,
+ "loss": 0.4031,
+ "step": 195900
+ },
+ {
+ "epoch": 1.010984361859654,
+ "grad_norm": 23535.03515625,
+ "learning_rate": 4.437656450343602e-06,
+ "loss": 0.3933,
+ "step": 195950
+ },
+ {
+ "epoch": 1.0112423318422668,
+ "grad_norm": 24465.794921875,
+ "learning_rate": 4.422623284122207e-06,
+ "loss": 0.4027,
+ "step": 196000
+ },
+ {
+ "epoch": 1.0115003018248796,
+ "grad_norm": 23942.03125,
+ "learning_rate": 4.407614446221936e-06,
+ "loss": 0.4024,
+ "step": 196050
+ },
+ {
+ "epoch": 1.0117582718074924,
+ "grad_norm": 23610.720703125,
+ "learning_rate": 4.392629944654248e-06,
+ "loss": 0.3982,
+ "step": 196100
+ },
+ {
+ "epoch": 1.0120162417901053,
+ "grad_norm": 25937.53125,
+ "learning_rate": 4.3776697874175375e-06,
+ "loss": 0.3991,
+ "step": 196150
+ },
+ {
+ "epoch": 1.0122742117727181,
+ "grad_norm": 24008.5234375,
+ "learning_rate": 4.362733982497286e-06,
+ "loss": 0.3968,
+ "step": 196200
+ },
+ {
+ "epoch": 1.012532181755331,
+ "grad_norm": 23377.744140625,
+ "learning_rate": 4.347822537865914e-06,
+ "loss": 0.3958,
+ "step": 196250
+ },
+ {
+ "epoch": 1.0127901517379438,
+ "grad_norm": 23768.7421875,
+ "learning_rate": 4.332935461482862e-06,
+ "loss": 0.4004,
+ "step": 196300
+ },
+ {
+ "epoch": 1.0130481217205567,
+ "grad_norm": 25974.603515625,
+ "learning_rate": 4.3180727612945896e-06,
+ "loss": 0.4038,
+ "step": 196350
+ },
+ {
+ "epoch": 1.0133060917031693,
+ "grad_norm": 22376.34765625,
+ "learning_rate": 4.303234445234477e-06,
+ "loss": 0.3991,
+ "step": 196400
+ },
+ {
+ "epoch": 1.0135640616857822,
+ "grad_norm": 22145.03515625,
+ "learning_rate": 4.288420521222963e-06,
+ "loss": 0.3971,
+ "step": 196450
+ },
+ {
+ "epoch": 1.013822031668395,
+ "grad_norm": 21512.77734375,
+ "learning_rate": 4.273630997167422e-06,
+ "loss": 0.399,
+ "step": 196500
+ },
+ {
+ "epoch": 1.0140800016510079,
+ "grad_norm": 22957.626953125,
+ "learning_rate": 4.258865880962215e-06,
+ "loss": 0.3995,
+ "step": 196550
+ },
+ {
+ "epoch": 1.0143379716336207,
+ "grad_norm": 21951.89453125,
+ "learning_rate": 4.244125180488673e-06,
+ "loss": 0.3961,
+ "step": 196600
+ },
+ {
+ "epoch": 1.0145959416162336,
+ "grad_norm": 23440.005859375,
+ "learning_rate": 4.229408903615095e-06,
+ "loss": 0.4057,
+ "step": 196650
+ },
+ {
+ "epoch": 1.0148539115988464,
+ "grad_norm": 23987.21484375,
+ "learning_rate": 4.214717058196754e-06,
+ "loss": 0.3999,
+ "step": 196700
+ },
+ {
+ "epoch": 1.0151118815814593,
+ "grad_norm": 24526.482421875,
+ "learning_rate": 4.200049652075866e-06,
+ "loss": 0.3964,
+ "step": 196750
+ },
+ {
+ "epoch": 1.0153698515640721,
+ "grad_norm": 23351.193359375,
+ "learning_rate": 4.185406693081612e-06,
+ "loss": 0.3978,
+ "step": 196800
+ },
+ {
+ "epoch": 1.0156278215466847,
+ "grad_norm": 25014.873046875,
+ "learning_rate": 4.170788189030106e-06,
+ "loss": 0.3963,
+ "step": 196850
+ },
+ {
+ "epoch": 1.0158857915292976,
+ "grad_norm": 21085.181640625,
+ "learning_rate": 4.156194147724451e-06,
+ "loss": 0.4015,
+ "step": 196900
+ },
+ {
+ "epoch": 1.0161437615119104,
+ "grad_norm": 20203.427734375,
+ "learning_rate": 4.141624576954634e-06,
+ "loss": 0.4037,
+ "step": 196950
+ },
+ {
+ "epoch": 1.0164017314945233,
+ "grad_norm": 23869.416015625,
+ "learning_rate": 4.1270794844976255e-06,
+ "loss": 0.4038,
+ "step": 197000
+ },
+ {
+ "epoch": 1.0166597014771361,
+ "grad_norm": 24936.158203125,
+ "learning_rate": 4.112558878117318e-06,
+ "loss": 0.4073,
+ "step": 197050
+ },
+ {
+ "epoch": 1.016917671459749,
+ "grad_norm": 23021.921875,
+ "learning_rate": 4.098062765564509e-06,
+ "loss": 0.4056,
+ "step": 197100
+ },
+ {
+ "epoch": 1.0171756414423618,
+ "grad_norm": 21626.19921875,
+ "learning_rate": 4.083591154576971e-06,
+ "loss": 0.3989,
+ "step": 197150
+ },
+ {
+ "epoch": 1.0174336114249747,
+ "grad_norm": 25556.169921875,
+ "learning_rate": 4.069144052879342e-06,
+ "loss": 0.3975,
+ "step": 197200
+ },
+ {
+ "epoch": 1.0176915814075873,
+ "grad_norm": 23286.365234375,
+ "learning_rate": 4.054721468183226e-06,
+ "loss": 0.3974,
+ "step": 197250
+ },
+ {
+ "epoch": 1.0179495513902002,
+ "grad_norm": 24497.57421875,
+ "learning_rate": 4.040323408187113e-06,
+ "loss": 0.4028,
+ "step": 197300
+ },
+ {
+ "epoch": 1.018207521372813,
+ "grad_norm": 26279.40625,
+ "learning_rate": 4.025949880576407e-06,
+ "loss": 0.4034,
+ "step": 197350
+ },
+ {
+ "epoch": 1.0184654913554259,
+ "grad_norm": 22679.267578125,
+ "learning_rate": 4.011600893023421e-06,
+ "loss": 0.3991,
+ "step": 197400
+ },
+ {
+ "epoch": 1.0187234613380387,
+ "grad_norm": 25421.83984375,
+ "learning_rate": 3.997276453187365e-06,
+ "loss": 0.4023,
+ "step": 197450
+ },
+ {
+ "epoch": 1.0189814313206516,
+ "grad_norm": 25313.75,
+ "learning_rate": 3.982976568714336e-06,
+ "loss": 0.4018,
+ "step": 197500
+ },
+ {
+ "epoch": 1.0192394013032644,
+ "grad_norm": 24318.505859375,
+ "learning_rate": 3.96870124723736e-06,
+ "loss": 0.4027,
+ "step": 197550
+ },
+ {
+ "epoch": 1.0194973712858773,
+ "grad_norm": 22409.70703125,
+ "learning_rate": 3.9544504963763105e-06,
+ "loss": 0.3982,
+ "step": 197600
+ },
+ {
+ "epoch": 1.01975534126849,
+ "grad_norm": 25028.7265625,
+ "learning_rate": 3.9402243237379675e-06,
+ "loss": 0.4037,
+ "step": 197650
+ },
+ {
+ "epoch": 1.0200133112511027,
+ "grad_norm": 21235.19140625,
+ "learning_rate": 3.926022736915985e-06,
+ "loss": 0.3972,
+ "step": 197700
+ },
+ {
+ "epoch": 1.0202712812337156,
+ "grad_norm": 24214.41015625,
+ "learning_rate": 3.911845743490889e-06,
+ "loss": 0.3984,
+ "step": 197750
+ },
+ {
+ "epoch": 1.0205292512163284,
+ "grad_norm": 24445.375,
+ "learning_rate": 3.897693351030102e-06,
+ "loss": 0.4025,
+ "step": 197800
+ },
+ {
+ "epoch": 1.0207872211989413,
+ "grad_norm": 25233.3515625,
+ "learning_rate": 3.883565567087871e-06,
+ "loss": 0.3993,
+ "step": 197850
+ },
+ {
+ "epoch": 1.0210451911815541,
+ "grad_norm": 23982.43359375,
+ "learning_rate": 3.8694623992053534e-06,
+ "loss": 0.4023,
+ "step": 197900
+ },
+ {
+ "epoch": 1.021303161164167,
+ "grad_norm": 28533.689453125,
+ "learning_rate": 3.855383854910549e-06,
+ "loss": 0.3917,
+ "step": 197950
+ },
+ {
+ "epoch": 1.0215611311467798,
+ "grad_norm": 26334.77734375,
+ "learning_rate": 3.841329941718286e-06,
+ "loss": 0.3989,
+ "step": 198000
+ },
+ {
+ "epoch": 1.0218191011293927,
+ "grad_norm": 24765.802734375,
+ "learning_rate": 3.827300667130312e-06,
+ "loss": 0.398,
+ "step": 198050
+ },
+ {
+ "epoch": 1.0220770711120055,
+ "grad_norm": 25089.34765625,
+ "learning_rate": 3.8132960386351445e-06,
+ "loss": 0.4049,
+ "step": 198100
+ },
+ {
+ "epoch": 1.0223350410946181,
+ "grad_norm": 23840.72265625,
+ "learning_rate": 3.7993160637082027e-06,
+ "loss": 0.3998,
+ "step": 198150
+ },
+ {
+ "epoch": 1.022593011077231,
+ "grad_norm": 21590.1328125,
+ "learning_rate": 3.7853607498117282e-06,
+ "loss": 0.404,
+ "step": 198200
+ },
+ {
+ "epoch": 1.0228509810598438,
+ "grad_norm": 24620.478515625,
+ "learning_rate": 3.7714301043947855e-06,
+ "loss": 0.3958,
+ "step": 198250
+ },
+ {
+ "epoch": 1.0231089510424567,
+ "grad_norm": 22476.82421875,
+ "learning_rate": 3.757524134893292e-06,
+ "loss": 0.3993,
+ "step": 198300
+ },
+ {
+ "epoch": 1.0233669210250695,
+ "grad_norm": 22550.45703125,
+ "learning_rate": 3.7436428487299836e-06,
+ "loss": 0.3983,
+ "step": 198350
+ },
+ {
+ "epoch": 1.0236248910076824,
+ "grad_norm": 23764.958984375,
+ "learning_rate": 3.7297862533144045e-06,
+ "loss": 0.4005,
+ "step": 198400
+ },
+ {
+ "epoch": 1.0238828609902952,
+ "grad_norm": 23600.103515625,
+ "learning_rate": 3.7159543560429667e-06,
+ "loss": 0.3976,
+ "step": 198450
+ },
+ {
+ "epoch": 1.024140830972908,
+ "grad_norm": 24258.537109375,
+ "learning_rate": 3.7021471642988583e-06,
+ "loss": 0.4015,
+ "step": 198500
+ },
+ {
+ "epoch": 1.0243988009555207,
+ "grad_norm": 22559.609375,
+ "learning_rate": 3.6883646854520837e-06,
+ "loss": 0.4028,
+ "step": 198550
+ },
+ {
+ "epoch": 1.0246567709381336,
+ "grad_norm": 20827.234375,
+ "learning_rate": 3.67460692685947e-06,
+ "loss": 0.3954,
+ "step": 198600
+ },
+ {
+ "epoch": 1.0249147409207464,
+ "grad_norm": 24864.171875,
+ "learning_rate": 3.6608738958646303e-06,
+ "loss": 0.3919,
+ "step": 198650
+ },
+ {
+ "epoch": 1.0251727109033593,
+ "grad_norm": 25603.6796875,
+ "learning_rate": 3.647165599798019e-06,
+ "loss": 0.3984,
+ "step": 198700
+ },
+ {
+ "epoch": 1.0254306808859721,
+ "grad_norm": 21448.0234375,
+ "learning_rate": 3.6334820459768217e-06,
+ "loss": 0.4031,
+ "step": 198750
+ },
+ {
+ "epoch": 1.025688650868585,
+ "grad_norm": 24923.51953125,
+ "learning_rate": 3.6198232417050782e-06,
+ "loss": 0.4023,
+ "step": 198800
+ },
+ {
+ "epoch": 1.0259466208511978,
+ "grad_norm": 21672.09765625,
+ "learning_rate": 3.6061891942735957e-06,
+ "loss": 0.4027,
+ "step": 198850
+ },
+ {
+ "epoch": 1.0262045908338107,
+ "grad_norm": 24733.31640625,
+ "learning_rate": 3.5925799109599423e-06,
+ "loss": 0.401,
+ "step": 198900
+ },
+ {
+ "epoch": 1.0264625608164235,
+ "grad_norm": 25941.05859375,
+ "learning_rate": 3.5789953990285284e-06,
+ "loss": 0.3944,
+ "step": 198950
+ },
+ {
+ "epoch": 1.0267205307990361,
+ "grad_norm": 25462.96875,
+ "learning_rate": 3.56543566573046e-06,
+ "loss": 0.4021,
+ "step": 199000
+ },
+ {
+ "epoch": 1.026978500781649,
+ "grad_norm": 24243.462890625,
+ "learning_rate": 3.5519007183036856e-06,
+ "loss": 0.4009,
+ "step": 199050
+ },
+ {
+ "epoch": 1.0272364707642618,
+ "grad_norm": 22507.208984375,
+ "learning_rate": 3.5383905639728987e-06,
+ "loss": 0.3968,
+ "step": 199100
+ },
+ {
+ "epoch": 1.0274944407468747,
+ "grad_norm": 22496.060546875,
+ "learning_rate": 3.524905209949553e-06,
+ "loss": 0.3988,
+ "step": 199150
+ },
+ {
+ "epoch": 1.0277524107294875,
+ "grad_norm": 22755.974609375,
+ "learning_rate": 3.511444663431862e-06,
+ "loss": 0.3944,
+ "step": 199200
+ },
+ {
+ "epoch": 1.0280103807121004,
+ "grad_norm": 24945.93359375,
+ "learning_rate": 3.498008931604818e-06,
+ "loss": 0.4015,
+ "step": 199250
+ },
+ {
+ "epoch": 1.0282683506947132,
+ "grad_norm": 23216.15625,
+ "learning_rate": 3.484598021640134e-06,
+ "loss": 0.3982,
+ "step": 199300
+ },
+ {
+ "epoch": 1.028526320677326,
+ "grad_norm": 24690.8203125,
+ "learning_rate": 3.4712119406963174e-06,
+ "loss": 0.4,
+ "step": 199350
+ },
+ {
+ "epoch": 1.0287842906599387,
+ "grad_norm": 23324.27734375,
+ "learning_rate": 3.4578506959185907e-06,
+ "loss": 0.4005,
+ "step": 199400
+ },
+ {
+ "epoch": 1.0290422606425516,
+ "grad_norm": 22831.544921875,
+ "learning_rate": 3.444514294438922e-06,
+ "loss": 0.3987,
+ "step": 199450
+ },
+ {
+ "epoch": 1.0293002306251644,
+ "grad_norm": 22126.681640625,
+ "learning_rate": 3.4312027433760383e-06,
+ "loss": 0.4044,
+ "step": 199500
+ },
+ {
+ "epoch": 1.0295582006077773,
+ "grad_norm": 22105.94140625,
+ "learning_rate": 3.417916049835368e-06,
+ "loss": 0.4023,
+ "step": 199550
+ },
+ {
+ "epoch": 1.02981617059039,
+ "grad_norm": 24164.646484375,
+ "learning_rate": 3.4046542209091037e-06,
+ "loss": 0.3968,
+ "step": 199600
+ },
+ {
+ "epoch": 1.030074140573003,
+ "grad_norm": 23752.33203125,
+ "learning_rate": 3.3914172636761554e-06,
+ "loss": 0.3974,
+ "step": 199650
+ },
+ {
+ "epoch": 1.0303321105556158,
+ "grad_norm": 21793.787109375,
+ "learning_rate": 3.3782051852021433e-06,
+ "loss": 0.3981,
+ "step": 199700
+ },
+ {
+ "epoch": 1.0305900805382286,
+ "grad_norm": 26727.91796875,
+ "learning_rate": 3.365017992539432e-06,
+ "loss": 0.4025,
+ "step": 199750
+ },
+ {
+ "epoch": 1.0308480505208415,
+ "grad_norm": 21089.958984375,
+ "learning_rate": 3.3518556927270683e-06,
+ "loss": 0.4001,
+ "step": 199800
+ },
+ {
+ "epoch": 1.0311060205034541,
+ "grad_norm": 23690.0390625,
+ "learning_rate": 3.33871829279086e-06,
+ "loss": 0.3956,
+ "step": 199850
+ },
+ {
+ "epoch": 1.031363990486067,
+ "grad_norm": 24266.84375,
+ "learning_rate": 3.325605799743281e-06,
+ "loss": 0.3966,
+ "step": 199900
+ },
+ {
+ "epoch": 1.0316219604686798,
+ "grad_norm": 22199.455078125,
+ "learning_rate": 3.312518220583527e-06,
+ "loss": 0.4058,
+ "step": 199950
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "grad_norm": 21272.033203125,
+ "learning_rate": 3.299455562297504e-06,
+ "loss": 0.3969,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "eval_loss": 0.38684460520744324,
+ "eval_runtime": 3230.0057,
+ "eval_samples_per_second": 960.097,
+ "eval_steps_per_second": 1.875,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0321379004339055,
+ "grad_norm": 23089.7578125,
+ "learning_rate": 3.286417831857791e-06,
+ "loss": 0.4011,
+ "step": 200050
+ },
+ {
+ "epoch": 1.0323958704165184,
+ "grad_norm": 27875.5859375,
+ "learning_rate": 3.2734050362236814e-06,
+ "loss": 0.4014,
+ "step": 200100
+ },
+ {
+ "epoch": 1.0326538403991312,
+ "grad_norm": 22023.40234375,
+ "learning_rate": 3.260417182341169e-06,
+ "loss": 0.398,
+ "step": 200150
+ },
+ {
+ "epoch": 1.032911810381744,
+ "grad_norm": 23899.208984375,
+ "learning_rate": 3.247454277142892e-06,
+ "loss": 0.3976,
+ "step": 200200
+ },
+ {
+ "epoch": 1.0331697803643567,
+ "grad_norm": 22874.44921875,
+ "learning_rate": 3.2345163275482147e-06,
+ "loss": 0.4014,
+ "step": 200250
+ },
+ {
+ "epoch": 1.0334277503469695,
+ "grad_norm": 21650.296875,
+ "learning_rate": 3.221603340463164e-06,
+ "loss": 0.4012,
+ "step": 200300
+ },
+ {
+ "epoch": 1.0336857203295824,
+ "grad_norm": 24189.89453125,
+ "learning_rate": 3.2087153227804314e-06,
+ "loss": 0.401,
+ "step": 200350
+ },
+ {
+ "epoch": 1.0339436903121952,
+ "grad_norm": 21525.12109375,
+ "learning_rate": 3.1958522813794134e-06,
+ "loss": 0.4016,
+ "step": 200400
+ },
+ {
+ "epoch": 1.034201660294808,
+ "grad_norm": 23732.640625,
+ "learning_rate": 3.1830142231261294e-06,
+ "loss": 0.4021,
+ "step": 200450
+ },
+ {
+ "epoch": 1.034459630277421,
+ "grad_norm": 24911.607421875,
+ "learning_rate": 3.170201154873298e-06,
+ "loss": 0.3943,
+ "step": 200500
+ },
+ {
+ "epoch": 1.0347176002600338,
+ "grad_norm": 25295.861328125,
+ "learning_rate": 3.1574130834602813e-06,
+ "loss": 0.401,
+ "step": 200550
+ },
+ {
+ "epoch": 1.0349755702426466,
+ "grad_norm": 23536.498046875,
+ "learning_rate": 3.1446500157131075e-06,
+ "loss": 0.3964,
+ "step": 200600
+ },
+ {
+ "epoch": 1.0352335402252595,
+ "grad_norm": 26484.287109375,
+ "learning_rate": 3.131911958444461e-06,
+ "loss": 0.4068,
+ "step": 200650
+ },
+ {
+ "epoch": 1.0354915102078721,
+ "grad_norm": 24330.001953125,
+ "learning_rate": 3.1191989184536474e-06,
+ "loss": 0.3911,
+ "step": 200700
+ },
+ {
+ "epoch": 1.035749480190485,
+ "grad_norm": 21095.994140625,
+ "learning_rate": 3.1065109025266713e-06,
+ "loss": 0.4,
+ "step": 200750
+ },
+ {
+ "epoch": 1.0360074501730978,
+ "grad_norm": 21829.64453125,
+ "learning_rate": 3.093847917436132e-06,
+ "loss": 0.4016,
+ "step": 200800
+ },
+ {
+ "epoch": 1.0362654201557107,
+ "grad_norm": 25772.79296875,
+ "learning_rate": 3.0812099699412953e-06,
+ "loss": 0.4032,
+ "step": 200850
+ },
+ {
+ "epoch": 1.0365233901383235,
+ "grad_norm": 25614.240234375,
+ "learning_rate": 3.0685970667880425e-06,
+ "loss": 0.3976,
+ "step": 200900
+ },
+ {
+ "epoch": 1.0367813601209364,
+ "grad_norm": 26170.455078125,
+ "learning_rate": 3.056009214708905e-06,
+ "loss": 0.4001,
+ "step": 200950
+ },
+ {
+ "epoch": 1.0370393301035492,
+ "grad_norm": 24801.76171875,
+ "learning_rate": 3.0434464204230186e-06,
+ "loss": 0.3924,
+ "step": 201000
+ },
+ {
+ "epoch": 1.037297300086162,
+ "grad_norm": 28940.640625,
+ "learning_rate": 3.0309086906361917e-06,
+ "loss": 0.3998,
+ "step": 201050
+ },
+ {
+ "epoch": 1.037555270068775,
+ "grad_norm": 23856.90625,
+ "learning_rate": 3.018396032040788e-06,
+ "loss": 0.397,
+ "step": 201100
+ },
+ {
+ "epoch": 1.0378132400513875,
+ "grad_norm": 23309.861328125,
+ "learning_rate": 3.005908451315842e-06,
+ "loss": 0.4026,
+ "step": 201150
+ },
+ {
+ "epoch": 1.0380712100340004,
+ "grad_norm": 23592.7265625,
+ "learning_rate": 2.993445955126978e-06,
+ "loss": 0.3971,
+ "step": 201200
+ },
+ {
+ "epoch": 1.0383291800166132,
+ "grad_norm": 23301.861328125,
+ "learning_rate": 2.9810085501264296e-06,
+ "loss": 0.403,
+ "step": 201250
+ },
+ {
+ "epoch": 1.038587149999226,
+ "grad_norm": 23200.0859375,
+ "learning_rate": 2.968596242953059e-06,
+ "loss": 0.4001,
+ "step": 201300
+ },
+ {
+ "epoch": 1.038845119981839,
+ "grad_norm": 26894.70703125,
+ "learning_rate": 2.956209040232294e-06,
+ "loss": 0.3988,
+ "step": 201350
+ },
+ {
+ "epoch": 1.0391030899644518,
+ "grad_norm": 22423.931640625,
+ "learning_rate": 2.9438469485761956e-06,
+ "loss": 0.3981,
+ "step": 201400
+ },
+ {
+ "epoch": 1.0393610599470646,
+ "grad_norm": 24167.068359375,
+ "learning_rate": 2.9315099745834073e-06,
+ "loss": 0.4024,
+ "step": 201450
+ },
+ {
+ "epoch": 1.0396190299296775,
+ "grad_norm": 25832.712890625,
+ "learning_rate": 2.9191981248391677e-06,
+ "loss": 0.3937,
+ "step": 201500
+ },
+ {
+ "epoch": 1.03987699991229,
+ "grad_norm": 26923.005859375,
+ "learning_rate": 2.9069114059153024e-06,
+ "loss": 0.3922,
+ "step": 201550
+ },
+ {
+ "epoch": 1.040134969894903,
+ "grad_norm": 23295.380859375,
+ "learning_rate": 2.8946498243702158e-06,
+ "loss": 0.4011,
+ "step": 201600
+ },
+ {
+ "epoch": 1.0403929398775158,
+ "grad_norm": 23378.5234375,
+ "learning_rate": 2.882413386748922e-06,
+ "loss": 0.4033,
+ "step": 201650
+ },
+ {
+ "epoch": 1.0406509098601286,
+ "grad_norm": 24349.9140625,
+ "learning_rate": 2.8702020995829803e-06,
+ "loss": 0.3964,
+ "step": 201700
+ },
+ {
+ "epoch": 1.0409088798427415,
+ "grad_norm": 24178.61328125,
+ "learning_rate": 2.8580159693905485e-06,
+ "loss": 0.3978,
+ "step": 201750
+ },
+ {
+ "epoch": 1.0411668498253543,
+ "grad_norm": 24998.189453125,
+ "learning_rate": 2.8458550026763344e-06,
+ "loss": 0.3943,
+ "step": 201800
+ },
+ {
+ "epoch": 1.0414248198079672,
+ "grad_norm": 28928.828125,
+ "learning_rate": 2.8337192059316344e-06,
+ "loss": 0.3998,
+ "step": 201850
+ },
+ {
+ "epoch": 1.04168278979058,
+ "grad_norm": 24329.37890625,
+ "learning_rate": 2.8216085856342946e-06,
+ "loss": 0.3976,
+ "step": 201900
+ },
+ {
+ "epoch": 1.041940759773193,
+ "grad_norm": 24121.482421875,
+ "learning_rate": 2.809523148248744e-06,
+ "loss": 0.3952,
+ "step": 201950
+ },
+ {
+ "epoch": 1.0421987297558055,
+ "grad_norm": 23812.671875,
+ "learning_rate": 2.7974629002259443e-06,
+ "loss": 0.4052,
+ "step": 202000
+ },
+ {
+ "epoch": 1.0424566997384184,
+ "grad_norm": 25162.40234375,
+ "learning_rate": 2.785427848003419e-06,
+ "loss": 0.3948,
+ "step": 202050
+ },
+ {
+ "epoch": 1.0427146697210312,
+ "grad_norm": 23631.462890625,
+ "learning_rate": 2.773417998005262e-06,
+ "loss": 0.3982,
+ "step": 202100
+ },
+ {
+ "epoch": 1.042972639703644,
+ "grad_norm": 24178.177734375,
+ "learning_rate": 2.761433356642079e-06,
+ "loss": 0.4012,
+ "step": 202150
+ },
+ {
+ "epoch": 1.043230609686257,
+ "grad_norm": 24726.37890625,
+ "learning_rate": 2.7494739303110527e-06,
+ "loss": 0.3926,
+ "step": 202200
+ },
+ {
+ "epoch": 1.0434885796688698,
+ "grad_norm": 23798.73828125,
+ "learning_rate": 2.7375397253958935e-06,
+ "loss": 0.3998,
+ "step": 202250
+ },
+ {
+ "epoch": 1.0437465496514826,
+ "grad_norm": 25162.677734375,
+ "learning_rate": 2.725630748266844e-06,
+ "loss": 0.4038,
+ "step": 202300
+ },
+ {
+ "epoch": 1.0440045196340955,
+ "grad_norm": 28668.78515625,
+ "learning_rate": 2.7137470052806814e-06,
+ "loss": 0.3989,
+ "step": 202350
+ },
+ {
+ "epoch": 1.0442624896167083,
+ "grad_norm": 22550.810546875,
+ "learning_rate": 2.7018885027807195e-06,
+ "loss": 0.3994,
+ "step": 202400
+ },
+ {
+ "epoch": 1.044520459599321,
+ "grad_norm": 26758.71484375,
+ "learning_rate": 2.6900552470968064e-06,
+ "loss": 0.4063,
+ "step": 202450
+ },
+ {
+ "epoch": 1.0447784295819338,
+ "grad_norm": 24895.77734375,
+ "learning_rate": 2.678247244545301e-06,
+ "loss": 0.3968,
+ "step": 202500
+ },
+ {
+ "epoch": 1.0450363995645466,
+ "grad_norm": 22442.416015625,
+ "learning_rate": 2.6664645014290833e-06,
+ "loss": 0.4009,
+ "step": 202550
+ },
+ {
+ "epoch": 1.0452943695471595,
+ "grad_norm": 24647.232421875,
+ "learning_rate": 2.654707024037556e-06,
+ "loss": 0.3984,
+ "step": 202600
+ },
+ {
+ "epoch": 1.0455523395297723,
+ "grad_norm": 24156.189453125,
+ "learning_rate": 2.6429748186466265e-06,
+ "loss": 0.3983,
+ "step": 202650
+ },
+ {
+ "epoch": 1.0458103095123852,
+ "grad_norm": 24131.658203125,
+ "learning_rate": 2.6312678915187185e-06,
+ "loss": 0.3941,
+ "step": 202700
+ },
+ {
+ "epoch": 1.046068279494998,
+ "grad_norm": 24890.5625,
+ "learning_rate": 2.6195862489027833e-06,
+ "loss": 0.3936,
+ "step": 202750
+ },
+ {
+ "epoch": 1.0463262494776109,
+ "grad_norm": 26486.58203125,
+ "learning_rate": 2.607929897034228e-06,
+ "loss": 0.4073,
+ "step": 202800
+ },
+ {
+ "epoch": 1.0465842194602235,
+ "grad_norm": 24554.09375,
+ "learning_rate": 2.5962988421350033e-06,
+ "loss": 0.3985,
+ "step": 202850
+ },
+ {
+ "epoch": 1.0468421894428364,
+ "grad_norm": 24964.349609375,
+ "learning_rate": 2.584693090413537e-06,
+ "loss": 0.3974,
+ "step": 202900
+ },
+ {
+ "epoch": 1.0471001594254492,
+ "grad_norm": 21256.87890625,
+ "learning_rate": 2.5731126480647516e-06,
+ "loss": 0.3969,
+ "step": 202950
+ },
+ {
+ "epoch": 1.047358129408062,
+ "grad_norm": 23721.197265625,
+ "learning_rate": 2.5615575212700804e-06,
+ "loss": 0.4039,
+ "step": 203000
+ },
+ {
+ "epoch": 1.047616099390675,
+ "grad_norm": 25096.4609375,
+ "learning_rate": 2.550027716197395e-06,
+ "loss": 0.3953,
+ "step": 203050
+ },
+ {
+ "epoch": 1.0478740693732878,
+ "grad_norm": 22199.11328125,
+ "learning_rate": 2.5385232390011114e-06,
+ "loss": 0.3979,
+ "step": 203100
+ },
+ {
+ "epoch": 1.0481320393559006,
+ "grad_norm": 24967.4609375,
+ "learning_rate": 2.527044095822084e-06,
+ "loss": 0.4023,
+ "step": 203150
+ },
+ {
+ "epoch": 1.0483900093385135,
+ "grad_norm": 28301.302734375,
+ "learning_rate": 2.5155902927876564e-06,
+ "loss": 0.4047,
+ "step": 203200
+ },
+ {
+ "epoch": 1.0486479793211263,
+ "grad_norm": 22268.037109375,
+ "learning_rate": 2.504161836011648e-06,
+ "loss": 0.4032,
+ "step": 203250
+ },
+ {
+ "epoch": 1.048905949303739,
+ "grad_norm": 28254.658203125,
+ "learning_rate": 2.4927587315943414e-06,
+ "loss": 0.3915,
+ "step": 203300
+ },
+ {
+ "epoch": 1.0491639192863518,
+ "grad_norm": 24471.462890625,
+ "learning_rate": 2.4813809856225112e-06,
+ "loss": 0.3986,
+ "step": 203350
+ },
+ {
+ "epoch": 1.0494218892689646,
+ "grad_norm": 24208.7578125,
+ "learning_rate": 2.470028604169361e-06,
+ "loss": 0.3969,
+ "step": 203400
+ },
+ {
+ "epoch": 1.0496798592515775,
+ "grad_norm": 23962.025390625,
+ "learning_rate": 2.4587015932945824e-06,
+ "loss": 0.3992,
+ "step": 203450
+ },
+ {
+ "epoch": 1.0499378292341903,
+ "grad_norm": 24777.421875,
+ "learning_rate": 2.4473999590443054e-06,
+ "loss": 0.4042,
+ "step": 203500
+ },
+ {
+ "epoch": 1.0501957992168032,
+ "grad_norm": 26705.40234375,
+ "learning_rate": 2.4361237074511323e-06,
+ "loss": 0.3985,
+ "step": 203550
+ },
+ {
+ "epoch": 1.050453769199416,
+ "grad_norm": 22508.51171875,
+ "learning_rate": 2.424872844534093e-06,
+ "loss": 0.3967,
+ "step": 203600
+ },
+ {
+ "epoch": 1.0507117391820289,
+ "grad_norm": 24678.62109375,
+ "learning_rate": 2.4136473762987057e-06,
+ "loss": 0.4002,
+ "step": 203650
+ },
+ {
+ "epoch": 1.0509697091646415,
+ "grad_norm": 24190.259765625,
+ "learning_rate": 2.402447308736883e-06,
+ "loss": 0.4002,
+ "step": 203700
+ },
+ {
+ "epoch": 1.0512276791472543,
+ "grad_norm": 27986.912109375,
+ "learning_rate": 2.391272647827014e-06,
+ "loss": 0.406,
+ "step": 203750
+ },
+ {
+ "epoch": 1.0514856491298672,
+ "grad_norm": 23664.740234375,
+ "learning_rate": 2.3801233995339236e-06,
+ "loss": 0.3988,
+ "step": 203800
+ },
+ {
+ "epoch": 1.05174361911248,
+ "grad_norm": 32503.17578125,
+ "learning_rate": 2.368999569808844e-06,
+ "loss": 0.3996,
+ "step": 203850
+ },
+ {
+ "epoch": 1.052001589095093,
+ "grad_norm": 24140.591796875,
+ "learning_rate": 2.3579011645894933e-06,
+ "loss": 0.4021,
+ "step": 203900
+ },
+ {
+ "epoch": 1.0522595590777057,
+ "grad_norm": 24920.033203125,
+ "learning_rate": 2.3468281897999487e-06,
+ "loss": 0.4038,
+ "step": 203950
+ },
+ {
+ "epoch": 1.0525175290603186,
+ "grad_norm": 20836.1796875,
+ "learning_rate": 2.335780651350772e-06,
+ "loss": 0.3929,
+ "step": 204000
+ },
+ {
+ "epoch": 1.0527754990429314,
+ "grad_norm": 22305.021484375,
+ "learning_rate": 2.324758555138923e-06,
+ "loss": 0.3963,
+ "step": 204050
+ },
+ {
+ "epoch": 1.0530334690255443,
+ "grad_norm": 22536.13671875,
+ "learning_rate": 2.3137619070477788e-06,
+ "loss": 0.3923,
+ "step": 204100
+ },
+ {
+ "epoch": 1.053291439008157,
+ "grad_norm": 23319.326171875,
+ "learning_rate": 2.3027907129471395e-06,
+ "loss": 0.4034,
+ "step": 204150
+ },
+ {
+ "epoch": 1.0535494089907698,
+ "grad_norm": 25774.677734375,
+ "learning_rate": 2.2918449786932085e-06,
+ "loss": 0.4015,
+ "step": 204200
+ },
+ {
+ "epoch": 1.0538073789733826,
+ "grad_norm": 23130.119140625,
+ "learning_rate": 2.280924710128618e-06,
+ "loss": 0.3971,
+ "step": 204250
+ },
+ {
+ "epoch": 1.0540653489559955,
+ "grad_norm": 23122.1875,
+ "learning_rate": 2.270029913082394e-06,
+ "loss": 0.3969,
+ "step": 204300
+ },
+ {
+ "epoch": 1.0543233189386083,
+ "grad_norm": 21518.763671875,
+ "learning_rate": 2.2591605933699632e-06,
+ "loss": 0.3992,
+ "step": 204350
+ },
+ {
+ "epoch": 1.0545812889212212,
+ "grad_norm": 25077.322265625,
+ "learning_rate": 2.248316756793156e-06,
+ "loss": 0.405,
+ "step": 204400
+ },
+ {
+ "epoch": 1.054839258903834,
+ "grad_norm": 23907.869140625,
+ "learning_rate": 2.237498409140215e-06,
+ "loss": 0.4009,
+ "step": 204450
+ },
+ {
+ "epoch": 1.0550972288864469,
+ "grad_norm": 22796.865234375,
+ "learning_rate": 2.2267055561857484e-06,
+ "loss": 0.4044,
+ "step": 204500
+ },
+ {
+ "epoch": 1.0553551988690595,
+ "grad_norm": 33471.05859375,
+ "learning_rate": 2.2159382036907927e-06,
+ "loss": 0.4021,
+ "step": 204550
+ },
+ {
+ "epoch": 1.0556131688516723,
+ "grad_norm": 23975.6640625,
+ "learning_rate": 2.2051963574027225e-06,
+ "loss": 0.3922,
+ "step": 204600
+ },
+ {
+ "epoch": 1.0558711388342852,
+ "grad_norm": 24563.220703125,
+ "learning_rate": 2.194480023055351e-06,
+ "loss": 0.3952,
+ "step": 204650
+ },
+ {
+ "epoch": 1.056129108816898,
+ "grad_norm": 24479.20703125,
+ "learning_rate": 2.1837892063688525e-06,
+ "loss": 0.4005,
+ "step": 204700
+ },
+ {
+ "epoch": 1.0563870787995109,
+ "grad_norm": 24895.6640625,
+ "learning_rate": 2.173123913049757e-06,
+ "loss": 0.3985,
+ "step": 204750
+ },
+ {
+ "epoch": 1.0566450487821237,
+ "grad_norm": 25606.34765625,
+ "learning_rate": 2.1624841487910052e-06,
+ "loss": 0.4019,
+ "step": 204800
+ },
+ {
+ "epoch": 1.0569030187647366,
+ "grad_norm": 23026.8828125,
+ "learning_rate": 2.151869919271904e-06,
+ "loss": 0.4023,
+ "step": 204850
+ },
+ {
+ "epoch": 1.0571609887473494,
+ "grad_norm": 24365.9609375,
+ "learning_rate": 2.1412812301581097e-06,
+ "loss": 0.3992,
+ "step": 204900
+ },
+ {
+ "epoch": 1.0574189587299623,
+ "grad_norm": 25374.990234375,
+ "learning_rate": 2.130718087101663e-06,
+ "loss": 0.4009,
+ "step": 204950
+ },
+ {
+ "epoch": 1.057676928712575,
+ "grad_norm": 23697.388671875,
+ "learning_rate": 2.1201804957409697e-06,
+ "loss": 0.4042,
+ "step": 205000
+ },
+ {
+ "epoch": 1.057676928712575,
+ "eval_loss": 0.386392205953598,
+ "eval_runtime": 3213.2768,
+ "eval_samples_per_second": 965.096,
+ "eval_steps_per_second": 1.885,
+ "step": 205000
+ }
+ ],
+ "logging_steps": 50,
+ "max_steps": 225000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 2,
+ "save_steps": 5000,
+ "stateful_callbacks": {
+ "EarlyStoppingCallback": {
+ "args": {
+ "early_stopping_patience": 5,
+ "early_stopping_threshold": 0.0
+ },
+ "attributes": {
+ "early_stopping_patience_counter": 0
+ }
+ },
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.734735392877381e+17,
+ "train_batch_size": 128,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed
+size 5304
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-205000/vocab.json
@@ -0,0 +1 @@
+{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/config.json
@@ -0,0 +1,44 @@
+{
+ "architectures": [
+ "GloMeModelForMaskedLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.0,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "vocab_size": 36
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/merges.txt
@@ -0,0 +1 @@
+#version: 0.2
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95a3b6a9bfde0c4d32af985aedcef72b13300f4d
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de68fc77e7b20dac383ec0e6c5c9f4baaeb2013fe9c78e7b390b0a225406fc89
+size 61429032
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5efd9997b0a989d6541a8a7732455055a7abf972
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:661961eea16b3e466cb1016be7687726369ef24c60264457890d1510ba8ebdd7
+size 122968954
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3808c33d3bf4e4535570f36326852699dbd68afe
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f6c94708df2e1ca974b8d47e998d435a2b275d0a6e954f260928cb8d4f7a245
+size 14244
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cef1f7bc109f33247a08372c0cca5bef9791d7c7
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e9fd2bdd7b201e0f82b39359f0fb72a520ab1415a4d65a2db92b0caae70f33a
+size 1064
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/special_tokens_map.json
@@ -0,0 +1,51 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "cls_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "sep_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer.json
@@ -0,0 +1,123 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 27,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 28,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 29,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 30,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 31,
+ "content": "",
+ "single_word": false,
+ "lstrip": true,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": null,
+ "pre_tokenizer": {
+ "type": "ByteLevel",
+ "add_prefix_space": false,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "post_processor": {
+ "type": "RobertaProcessing",
+ "sep": [
+ "",
+ 29
+ ],
+ "cls": [
+ "",
+ 28
+ ],
+ "trim_offsets": true,
+ "add_prefix_space": false
+ },
+ "decoder": {
+ "type": "ByteLevel",
+ "add_prefix_space": true,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": null,
+ "continuing_subword_prefix": "",
+ "end_of_word_suffix": "",
+ "fuse_unk": false,
+ "byte_fallback": false,
+ "ignore_merges": false,
+ "vocab": {
+ "A": 0,
+ "R": 1,
+ "N": 2,
+ "D": 3,
+ "C": 4,
+ "Q": 5,
+ "E": 6,
+ "G": 7,
+ "H": 8,
+ "I": 9,
+ "L": 10,
+ "K": 11,
+ "M": 12,
+ "F": 13,
+ "P": 14,
+ "S": 15,
+ "T": 16,
+ "W": 17,
+ "Y": 18,
+ "V": 19,
+ "X": 20,
+ "B": 21,
+ "U": 22,
+ "Z": 23,
+ "O": 24,
+ ".": 25,
+ "-": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31
+ },
+ "merges": []
+ }
+}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/tokenizer_config.json
@@ -0,0 +1,58 @@
+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "27": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "28": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "29": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "30": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "31": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "errors": "replace",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "RobertaTokenizer",
+ "trim_offsets": true,
+ "unk_token": ""
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..07787f659992eebbebb199804ee4b405febb0930
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/trainer_state.json
@@ -0,0 +1,29779 @@
+{
+ "best_global_step": null,
+ "best_metric": 0.3863469064235687,
+ "best_model_checkpoint": null,
+ "epoch": 1.0834739269738574,
+ "eval_steps": 5000,
+ "global_step": 210000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0002579699826128232,
+ "grad_norm": 314643.03125,
+ "learning_rate": 4.9e-07,
+ "loss": 3.7018,
+ "step": 50
+ },
+ {
+ "epoch": 0.0005159399652256464,
+ "grad_norm": 286448.65625,
+ "learning_rate": 9.9e-07,
+ "loss": 3.6146,
+ "step": 100
+ },
+ {
+ "epoch": 0.0007739099478384695,
+ "grad_norm": 214582.28125,
+ "learning_rate": 1.4900000000000001e-06,
+ "loss": 3.4562,
+ "step": 150
+ },
+ {
+ "epoch": 0.0010318799304512927,
+ "grad_norm": 137193.9375,
+ "learning_rate": 1.99e-06,
+ "loss": 3.2558,
+ "step": 200
+ },
+ {
+ "epoch": 0.0012898499130641159,
+ "grad_norm": 82222.84375,
+ "learning_rate": 2.49e-06,
+ "loss": 3.0641,
+ "step": 250
+ },
+ {
+ "epoch": 0.001547819895676939,
+ "grad_norm": 56772.109375,
+ "learning_rate": 2.99e-06,
+ "loss": 2.9123,
+ "step": 300
+ },
+ {
+ "epoch": 0.0018057898782897622,
+ "grad_norm": 46737.6796875,
+ "learning_rate": 3.49e-06,
+ "loss": 2.8131,
+ "step": 350
+ },
+ {
+ "epoch": 0.0020637598609025854,
+ "grad_norm": 38769.04296875,
+ "learning_rate": 3.99e-06,
+ "loss": 2.7526,
+ "step": 400
+ },
+ {
+ "epoch": 0.0023217298435154084,
+ "grad_norm": 34701.5546875,
+ "learning_rate": 4.49e-06,
+ "loss": 2.7178,
+ "step": 450
+ },
+ {
+ "epoch": 0.0025796998261282318,
+ "grad_norm": 30332.826171875,
+ "learning_rate": 4.9900000000000005e-06,
+ "loss": 2.6967,
+ "step": 500
+ },
+ {
+ "epoch": 0.0028376698087410547,
+ "grad_norm": 27192.7421875,
+ "learning_rate": 5.49e-06,
+ "loss": 2.6798,
+ "step": 550
+ },
+ {
+ "epoch": 0.003095639791353878,
+ "grad_norm": 36959.33984375,
+ "learning_rate": 5.99e-06,
+ "loss": 2.668,
+ "step": 600
+ },
+ {
+ "epoch": 0.003353609773966701,
+ "grad_norm": 30939.365234375,
+ "learning_rate": 6.4900000000000005e-06,
+ "loss": 2.6572,
+ "step": 650
+ },
+ {
+ "epoch": 0.0036115797565795245,
+ "grad_norm": 26976.78125,
+ "learning_rate": 6.990000000000001e-06,
+ "loss": 2.6397,
+ "step": 700
+ },
+ {
+ "epoch": 0.0038695497391923474,
+ "grad_norm": 32308.447265625,
+ "learning_rate": 7.4899999999999994e-06,
+ "loss": 2.6021,
+ "step": 750
+ },
+ {
+ "epoch": 0.004127519721805171,
+ "grad_norm": 33838.8046875,
+ "learning_rate": 7.99e-06,
+ "loss": 2.5058,
+ "step": 800
+ },
+ {
+ "epoch": 0.004385489704417994,
+ "grad_norm": 49298.84765625,
+ "learning_rate": 8.49e-06,
+ "loss": 2.4095,
+ "step": 850
+ },
+ {
+ "epoch": 0.004643459687030817,
+ "grad_norm": 36241.56640625,
+ "learning_rate": 8.99e-06,
+ "loss": 2.2961,
+ "step": 900
+ },
+ {
+ "epoch": 0.004901429669643641,
+ "grad_norm": 37884.82421875,
+ "learning_rate": 9.49e-06,
+ "loss": 2.1526,
+ "step": 950
+ },
+ {
+ "epoch": 0.0051593996522564635,
+ "grad_norm": 36827.66796875,
+ "learning_rate": 9.990000000000001e-06,
+ "loss": 2.0338,
+ "step": 1000
+ },
+ {
+ "epoch": 0.0054173696348692865,
+ "grad_norm": 41398.421875,
+ "learning_rate": 1.049e-05,
+ "loss": 1.939,
+ "step": 1050
+ },
+ {
+ "epoch": 0.0056753396174821094,
+ "grad_norm": 42648.38671875,
+ "learning_rate": 1.099e-05,
+ "loss": 1.8325,
+ "step": 1100
+ },
+ {
+ "epoch": 0.005933309600094933,
+ "grad_norm": 40172.9453125,
+ "learning_rate": 1.149e-05,
+ "loss": 1.7643,
+ "step": 1150
+ },
+ {
+ "epoch": 0.006191279582707756,
+ "grad_norm": 35860.8515625,
+ "learning_rate": 1.199e-05,
+ "loss": 1.6742,
+ "step": 1200
+ },
+ {
+ "epoch": 0.006449249565320579,
+ "grad_norm": 44456.93359375,
+ "learning_rate": 1.249e-05,
+ "loss": 1.6026,
+ "step": 1250
+ },
+ {
+ "epoch": 0.006707219547933402,
+ "grad_norm": 36839.08984375,
+ "learning_rate": 1.299e-05,
+ "loss": 1.521,
+ "step": 1300
+ },
+ {
+ "epoch": 0.006965189530546226,
+ "grad_norm": 44026.68359375,
+ "learning_rate": 1.349e-05,
+ "loss": 1.4436,
+ "step": 1350
+ },
+ {
+ "epoch": 0.007223159513159049,
+ "grad_norm": 35557.578125,
+ "learning_rate": 1.399e-05,
+ "loss": 1.3773,
+ "step": 1400
+ },
+ {
+ "epoch": 0.007481129495771872,
+ "grad_norm": 38767.60546875,
+ "learning_rate": 1.449e-05,
+ "loss": 1.3023,
+ "step": 1450
+ },
+ {
+ "epoch": 0.007739099478384695,
+ "grad_norm": 36654.796875,
+ "learning_rate": 1.499e-05,
+ "loss": 1.2627,
+ "step": 1500
+ },
+ {
+ "epoch": 0.007997069460997519,
+ "grad_norm": 41690.328125,
+ "learning_rate": 1.5490000000000002e-05,
+ "loss": 1.2063,
+ "step": 1550
+ },
+ {
+ "epoch": 0.008255039443610342,
+ "grad_norm": 38743.59375,
+ "learning_rate": 1.599e-05,
+ "loss": 1.1626,
+ "step": 1600
+ },
+ {
+ "epoch": 0.008513009426223165,
+ "grad_norm": 41839.7890625,
+ "learning_rate": 1.649e-05,
+ "loss": 1.1225,
+ "step": 1650
+ },
+ {
+ "epoch": 0.008770979408835988,
+ "grad_norm": 42897.0703125,
+ "learning_rate": 1.699e-05,
+ "loss": 1.0864,
+ "step": 1700
+ },
+ {
+ "epoch": 0.00902894939144881,
+ "grad_norm": 37412.30859375,
+ "learning_rate": 1.749e-05,
+ "loss": 1.0613,
+ "step": 1750
+ },
+ {
+ "epoch": 0.009286919374061633,
+ "grad_norm": 37235.484375,
+ "learning_rate": 1.7990000000000002e-05,
+ "loss": 1.0354,
+ "step": 1800
+ },
+ {
+ "epoch": 0.009544889356674458,
+ "grad_norm": 39117.6328125,
+ "learning_rate": 1.849e-05,
+ "loss": 1.0059,
+ "step": 1850
+ },
+ {
+ "epoch": 0.009802859339287281,
+ "grad_norm": 37297.6875,
+ "learning_rate": 1.8990000000000003e-05,
+ "loss": 0.9795,
+ "step": 1900
+ },
+ {
+ "epoch": 0.010060829321900104,
+ "grad_norm": 33772.24609375,
+ "learning_rate": 1.949e-05,
+ "loss": 0.9639,
+ "step": 1950
+ },
+ {
+ "epoch": 0.010318799304512927,
+ "grad_norm": 39775.046875,
+ "learning_rate": 1.999e-05,
+ "loss": 0.9386,
+ "step": 2000
+ },
+ {
+ "epoch": 0.01057676928712575,
+ "grad_norm": 38412.2109375,
+ "learning_rate": 2.0490000000000002e-05,
+ "loss": 0.9212,
+ "step": 2050
+ },
+ {
+ "epoch": 0.010834739269738573,
+ "grad_norm": 39548.98046875,
+ "learning_rate": 2.099e-05,
+ "loss": 0.9112,
+ "step": 2100
+ },
+ {
+ "epoch": 0.011092709252351396,
+ "grad_norm": 38127.77734375,
+ "learning_rate": 2.1490000000000003e-05,
+ "loss": 0.8866,
+ "step": 2150
+ },
+ {
+ "epoch": 0.011350679234964219,
+ "grad_norm": 39877.0390625,
+ "learning_rate": 2.199e-05,
+ "loss": 0.8806,
+ "step": 2200
+ },
+ {
+ "epoch": 0.011608649217577044,
+ "grad_norm": 34642.28515625,
+ "learning_rate": 2.249e-05,
+ "loss": 0.8645,
+ "step": 2250
+ },
+ {
+ "epoch": 0.011866619200189867,
+ "grad_norm": 38508.0078125,
+ "learning_rate": 2.2990000000000002e-05,
+ "loss": 0.8609,
+ "step": 2300
+ },
+ {
+ "epoch": 0.01212458918280269,
+ "grad_norm": 33287.765625,
+ "learning_rate": 2.349e-05,
+ "loss": 0.8443,
+ "step": 2350
+ },
+ {
+ "epoch": 0.012382559165415512,
+ "grad_norm": 35477.5546875,
+ "learning_rate": 2.3990000000000002e-05,
+ "loss": 0.839,
+ "step": 2400
+ },
+ {
+ "epoch": 0.012640529148028335,
+ "grad_norm": 32204.408203125,
+ "learning_rate": 2.449e-05,
+ "loss": 0.8204,
+ "step": 2450
+ },
+ {
+ "epoch": 0.012898499130641158,
+ "grad_norm": 35113.59765625,
+ "learning_rate": 2.4990000000000003e-05,
+ "loss": 0.8214,
+ "step": 2500
+ },
+ {
+ "epoch": 0.013156469113253981,
+ "grad_norm": 36591.2421875,
+ "learning_rate": 2.549e-05,
+ "loss": 0.8066,
+ "step": 2550
+ },
+ {
+ "epoch": 0.013414439095866804,
+ "grad_norm": 37926.3125,
+ "learning_rate": 2.5990000000000004e-05,
+ "loss": 0.7993,
+ "step": 2600
+ },
+ {
+ "epoch": 0.013672409078479627,
+ "grad_norm": 35413.01171875,
+ "learning_rate": 2.6490000000000002e-05,
+ "loss": 0.8012,
+ "step": 2650
+ },
+ {
+ "epoch": 0.013930379061092452,
+ "grad_norm": 33275.1796875,
+ "learning_rate": 2.6989999999999997e-05,
+ "loss": 0.7879,
+ "step": 2700
+ },
+ {
+ "epoch": 0.014188349043705275,
+ "grad_norm": 35463.87109375,
+ "learning_rate": 2.749e-05,
+ "loss": 0.7808,
+ "step": 2750
+ },
+ {
+ "epoch": 0.014446319026318098,
+ "grad_norm": 33143.234375,
+ "learning_rate": 2.7989999999999998e-05,
+ "loss": 0.7813,
+ "step": 2800
+ },
+ {
+ "epoch": 0.01470428900893092,
+ "grad_norm": 32908.71484375,
+ "learning_rate": 2.849e-05,
+ "loss": 0.7725,
+ "step": 2850
+ },
+ {
+ "epoch": 0.014962258991543744,
+ "grad_norm": 36443.578125,
+ "learning_rate": 2.8990000000000002e-05,
+ "loss": 0.761,
+ "step": 2900
+ },
+ {
+ "epoch": 0.015220228974156567,
+ "grad_norm": 32331.728515625,
+ "learning_rate": 2.949e-05,
+ "loss": 0.7588,
+ "step": 2950
+ },
+ {
+ "epoch": 0.01547819895676939,
+ "grad_norm": 33401.546875,
+ "learning_rate": 2.9990000000000003e-05,
+ "loss": 0.7462,
+ "step": 3000
+ },
+ {
+ "epoch": 0.015736168939382213,
+ "grad_norm": 32041.26171875,
+ "learning_rate": 3.049e-05,
+ "loss": 0.7449,
+ "step": 3050
+ },
+ {
+ "epoch": 0.015994138921995037,
+ "grad_norm": 32035.814453125,
+ "learning_rate": 3.099e-05,
+ "loss": 0.7373,
+ "step": 3100
+ },
+ {
+ "epoch": 0.01625210890460786,
+ "grad_norm": 31430.421875,
+ "learning_rate": 3.1490000000000005e-05,
+ "loss": 0.7371,
+ "step": 3150
+ },
+ {
+ "epoch": 0.016510078887220683,
+ "grad_norm": 30911.267578125,
+ "learning_rate": 3.1990000000000004e-05,
+ "loss": 0.7315,
+ "step": 3200
+ },
+ {
+ "epoch": 0.016768048869833505,
+ "grad_norm": 31906.193359375,
+ "learning_rate": 3.249e-05,
+ "loss": 0.7405,
+ "step": 3250
+ },
+ {
+ "epoch": 0.01702601885244633,
+ "grad_norm": 30320.1640625,
+ "learning_rate": 3.299e-05,
+ "loss": 0.7323,
+ "step": 3300
+ },
+ {
+ "epoch": 0.017283988835059154,
+ "grad_norm": 32357.072265625,
+ "learning_rate": 3.349e-05,
+ "loss": 0.7244,
+ "step": 3350
+ },
+ {
+ "epoch": 0.017541958817671975,
+ "grad_norm": 34023.2109375,
+ "learning_rate": 3.399e-05,
+ "loss": 0.7214,
+ "step": 3400
+ },
+ {
+ "epoch": 0.0177999288002848,
+ "grad_norm": 33940.8046875,
+ "learning_rate": 3.449e-05,
+ "loss": 0.7158,
+ "step": 3450
+ },
+ {
+ "epoch": 0.01805789878289762,
+ "grad_norm": 31701.14453125,
+ "learning_rate": 3.499e-05,
+ "loss": 0.7102,
+ "step": 3500
+ },
+ {
+ "epoch": 0.018315868765510446,
+ "grad_norm": 32291.861328125,
+ "learning_rate": 3.549e-05,
+ "loss": 0.7104,
+ "step": 3550
+ },
+ {
+ "epoch": 0.018573838748123267,
+ "grad_norm": 28074.177734375,
+ "learning_rate": 3.599e-05,
+ "loss": 0.7001,
+ "step": 3600
+ },
+ {
+ "epoch": 0.01883180873073609,
+ "grad_norm": 29823.787109375,
+ "learning_rate": 3.6490000000000005e-05,
+ "loss": 0.7029,
+ "step": 3650
+ },
+ {
+ "epoch": 0.019089778713348916,
+ "grad_norm": 29792.24609375,
+ "learning_rate": 3.699e-05,
+ "loss": 0.6949,
+ "step": 3700
+ },
+ {
+ "epoch": 0.019347748695961738,
+ "grad_norm": 31345.296875,
+ "learning_rate": 3.749e-05,
+ "loss": 0.6989,
+ "step": 3750
+ },
+ {
+ "epoch": 0.019605718678574562,
+ "grad_norm": 33923.0625,
+ "learning_rate": 3.799e-05,
+ "loss": 0.6984,
+ "step": 3800
+ },
+ {
+ "epoch": 0.019863688661187383,
+ "grad_norm": 30762.97265625,
+ "learning_rate": 3.8490000000000006e-05,
+ "loss": 0.6931,
+ "step": 3850
+ },
+ {
+ "epoch": 0.020121658643800208,
+ "grad_norm": 30794.13671875,
+ "learning_rate": 3.8990000000000004e-05,
+ "loss": 0.6923,
+ "step": 3900
+ },
+ {
+ "epoch": 0.02037962862641303,
+ "grad_norm": 29854.923828125,
+ "learning_rate": 3.9489999999999996e-05,
+ "loss": 0.6895,
+ "step": 3950
+ },
+ {
+ "epoch": 0.020637598609025854,
+ "grad_norm": 27336.958984375,
+ "learning_rate": 3.999e-05,
+ "loss": 0.6853,
+ "step": 4000
+ },
+ {
+ "epoch": 0.020895568591638675,
+ "grad_norm": 31836.81640625,
+ "learning_rate": 4.049e-05,
+ "loss": 0.6821,
+ "step": 4050
+ },
+ {
+ "epoch": 0.0211535385742515,
+ "grad_norm": 28508.548828125,
+ "learning_rate": 4.099e-05,
+ "loss": 0.6857,
+ "step": 4100
+ },
+ {
+ "epoch": 0.021411508556864325,
+ "grad_norm": 30309.2421875,
+ "learning_rate": 4.1490000000000004e-05,
+ "loss": 0.6791,
+ "step": 4150
+ },
+ {
+ "epoch": 0.021669478539477146,
+ "grad_norm": 31035.0703125,
+ "learning_rate": 4.199e-05,
+ "loss": 0.6762,
+ "step": 4200
+ },
+ {
+ "epoch": 0.02192744852208997,
+ "grad_norm": 30893.951171875,
+ "learning_rate": 4.249e-05,
+ "loss": 0.6739,
+ "step": 4250
+ },
+ {
+ "epoch": 0.022185418504702792,
+ "grad_norm": 28317.12890625,
+ "learning_rate": 4.299e-05,
+ "loss": 0.6635,
+ "step": 4300
+ },
+ {
+ "epoch": 0.022443388487315617,
+ "grad_norm": 27140.29296875,
+ "learning_rate": 4.3490000000000005e-05,
+ "loss": 0.6694,
+ "step": 4350
+ },
+ {
+ "epoch": 0.022701358469928438,
+ "grad_norm": 27948.32421875,
+ "learning_rate": 4.3990000000000004e-05,
+ "loss": 0.6667,
+ "step": 4400
+ },
+ {
+ "epoch": 0.022959328452541262,
+ "grad_norm": 27243.44140625,
+ "learning_rate": 4.449e-05,
+ "loss": 0.6689,
+ "step": 4450
+ },
+ {
+ "epoch": 0.023217298435154087,
+ "grad_norm": 29163.98828125,
+ "learning_rate": 4.499e-05,
+ "loss": 0.6639,
+ "step": 4500
+ },
+ {
+ "epoch": 0.02347526841776691,
+ "grad_norm": 27801.79296875,
+ "learning_rate": 4.549000000000001e-05,
+ "loss": 0.6612,
+ "step": 4550
+ },
+ {
+ "epoch": 0.023733238400379733,
+ "grad_norm": 28201.7265625,
+ "learning_rate": 4.599e-05,
+ "loss": 0.6608,
+ "step": 4600
+ },
+ {
+ "epoch": 0.023991208382992554,
+ "grad_norm": 28875.06640625,
+ "learning_rate": 4.649e-05,
+ "loss": 0.6642,
+ "step": 4650
+ },
+ {
+ "epoch": 0.02424917836560538,
+ "grad_norm": 25467.376953125,
+ "learning_rate": 4.699e-05,
+ "loss": 0.6513,
+ "step": 4700
+ },
+ {
+ "epoch": 0.0245071483482182,
+ "grad_norm": 27359.97265625,
+ "learning_rate": 4.749e-05,
+ "loss": 0.6554,
+ "step": 4750
+ },
+ {
+ "epoch": 0.024765118330831025,
+ "grad_norm": 30614.15234375,
+ "learning_rate": 4.799e-05,
+ "loss": 0.6574,
+ "step": 4800
+ },
+ {
+ "epoch": 0.025023088313443846,
+ "grad_norm": 29069.677734375,
+ "learning_rate": 4.8490000000000005e-05,
+ "loss": 0.6562,
+ "step": 4850
+ },
+ {
+ "epoch": 0.02528105829605667,
+ "grad_norm": 27337.37109375,
+ "learning_rate": 4.8990000000000004e-05,
+ "loss": 0.6507,
+ "step": 4900
+ },
+ {
+ "epoch": 0.025539028278669496,
+ "grad_norm": 26784.7265625,
+ "learning_rate": 4.949e-05,
+ "loss": 0.64,
+ "step": 4950
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "grad_norm": 27480.509765625,
+ "learning_rate": 4.999e-05,
+ "loss": 0.6515,
+ "step": 5000
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "eval_loss": 0.6312834024429321,
+ "eval_runtime": 3280.995,
+ "eval_samples_per_second": 945.177,
+ "eval_steps_per_second": 1.846,
+ "step": 5000
+ },
+ {
+ "epoch": 0.02605496824389514,
+ "grad_norm": 27871.740234375,
+ "learning_rate": 5.0490000000000006e-05,
+ "loss": 0.6424,
+ "step": 5050
+ },
+ {
+ "epoch": 0.026312938226507963,
+ "grad_norm": 31187.00390625,
+ "learning_rate": 5.0990000000000005e-05,
+ "loss": 0.643,
+ "step": 5100
+ },
+ {
+ "epoch": 0.026570908209120787,
+ "grad_norm": 25956.521484375,
+ "learning_rate": 5.149e-05,
+ "loss": 0.65,
+ "step": 5150
+ },
+ {
+ "epoch": 0.02682887819173361,
+ "grad_norm": 25967.70703125,
+ "learning_rate": 5.199000000000001e-05,
+ "loss": 0.6466,
+ "step": 5200
+ },
+ {
+ "epoch": 0.027086848174346433,
+ "grad_norm": 25310.275390625,
+ "learning_rate": 5.249000000000001e-05,
+ "loss": 0.6429,
+ "step": 5250
+ },
+ {
+ "epoch": 0.027344818156959255,
+ "grad_norm": 24740.033203125,
+ "learning_rate": 5.2990000000000006e-05,
+ "loss": 0.6415,
+ "step": 5300
+ },
+ {
+ "epoch": 0.02760278813957208,
+ "grad_norm": 30795.58984375,
+ "learning_rate": 5.3490000000000005e-05,
+ "loss": 0.6424,
+ "step": 5350
+ },
+ {
+ "epoch": 0.027860758122184904,
+ "grad_norm": 30625.59375,
+ "learning_rate": 5.399000000000001e-05,
+ "loss": 0.6361,
+ "step": 5400
+ },
+ {
+ "epoch": 0.028118728104797725,
+ "grad_norm": 27036.14453125,
+ "learning_rate": 5.449000000000001e-05,
+ "loss": 0.6351,
+ "step": 5450
+ },
+ {
+ "epoch": 0.02837669808741055,
+ "grad_norm": 26934.447265625,
+ "learning_rate": 5.499000000000001e-05,
+ "loss": 0.6304,
+ "step": 5500
+ },
+ {
+ "epoch": 0.02863466807002337,
+ "grad_norm": 25540.291015625,
+ "learning_rate": 5.549e-05,
+ "loss": 0.6304,
+ "step": 5550
+ },
+ {
+ "epoch": 0.028892638052636196,
+ "grad_norm": 26574.9375,
+ "learning_rate": 5.599e-05,
+ "loss": 0.6444,
+ "step": 5600
+ },
+ {
+ "epoch": 0.029150608035249017,
+ "grad_norm": 26941.955078125,
+ "learning_rate": 5.6489999999999996e-05,
+ "loss": 0.6373,
+ "step": 5650
+ },
+ {
+ "epoch": 0.02940857801786184,
+ "grad_norm": 26957.7734375,
+ "learning_rate": 5.699e-05,
+ "loss": 0.6363,
+ "step": 5700
+ },
+ {
+ "epoch": 0.029666548000474666,
+ "grad_norm": 24377.55859375,
+ "learning_rate": 5.749e-05,
+ "loss": 0.6213,
+ "step": 5750
+ },
+ {
+ "epoch": 0.029924517983087488,
+ "grad_norm": 25600.697265625,
+ "learning_rate": 5.799e-05,
+ "loss": 0.6362,
+ "step": 5800
+ },
+ {
+ "epoch": 0.030182487965700312,
+ "grad_norm": 23841.47265625,
+ "learning_rate": 5.849e-05,
+ "loss": 0.6274,
+ "step": 5850
+ },
+ {
+ "epoch": 0.030440457948313134,
+ "grad_norm": 23847.73046875,
+ "learning_rate": 5.899e-05,
+ "loss": 0.624,
+ "step": 5900
+ },
+ {
+ "epoch": 0.030698427930925958,
+ "grad_norm": 25549.033203125,
+ "learning_rate": 5.949e-05,
+ "loss": 0.627,
+ "step": 5950
+ },
+ {
+ "epoch": 0.03095639791353878,
+ "grad_norm": 25286.8046875,
+ "learning_rate": 5.999e-05,
+ "loss": 0.6272,
+ "step": 6000
+ },
+ {
+ "epoch": 0.031214367896151604,
+ "grad_norm": 25137.384765625,
+ "learning_rate": 6.0490000000000005e-05,
+ "loss": 0.622,
+ "step": 6050
+ },
+ {
+ "epoch": 0.031472337878764425,
+ "grad_norm": 23606.23828125,
+ "learning_rate": 6.0990000000000004e-05,
+ "loss": 0.6262,
+ "step": 6100
+ },
+ {
+ "epoch": 0.031730307861377254,
+ "grad_norm": 32101.404296875,
+ "learning_rate": 6.149000000000001e-05,
+ "loss": 0.619,
+ "step": 6150
+ },
+ {
+ "epoch": 0.031988277843990075,
+ "grad_norm": 23683.73046875,
+ "learning_rate": 6.199000000000001e-05,
+ "loss": 0.6129,
+ "step": 6200
+ },
+ {
+ "epoch": 0.032246247826602896,
+ "grad_norm": 25243.49609375,
+ "learning_rate": 6.249e-05,
+ "loss": 0.6194,
+ "step": 6250
+ },
+ {
+ "epoch": 0.03250421780921572,
+ "grad_norm": 28690.10546875,
+ "learning_rate": 6.299e-05,
+ "loss": 0.6199,
+ "step": 6300
+ },
+ {
+ "epoch": 0.032762187791828545,
+ "grad_norm": 24198.47265625,
+ "learning_rate": 6.349e-05,
+ "loss": 0.6077,
+ "step": 6350
+ },
+ {
+ "epoch": 0.03302015777444137,
+ "grad_norm": 24742.998046875,
+ "learning_rate": 6.399e-05,
+ "loss": 0.6168,
+ "step": 6400
+ },
+ {
+ "epoch": 0.03327812775705419,
+ "grad_norm": 27489.93359375,
+ "learning_rate": 6.449e-05,
+ "loss": 0.6136,
+ "step": 6450
+ },
+ {
+ "epoch": 0.03353609773966701,
+ "grad_norm": 28733.7265625,
+ "learning_rate": 6.499000000000001e-05,
+ "loss": 0.6184,
+ "step": 6500
+ },
+ {
+ "epoch": 0.03379406772227984,
+ "grad_norm": 23810.544921875,
+ "learning_rate": 6.549000000000001e-05,
+ "loss": 0.6167,
+ "step": 6550
+ },
+ {
+ "epoch": 0.03405203770489266,
+ "grad_norm": 25503.98828125,
+ "learning_rate": 6.599000000000001e-05,
+ "loss": 0.6184,
+ "step": 6600
+ },
+ {
+ "epoch": 0.03431000768750548,
+ "grad_norm": 24550.26171875,
+ "learning_rate": 6.649000000000001e-05,
+ "loss": 0.6146,
+ "step": 6650
+ },
+ {
+ "epoch": 0.03456797767011831,
+ "grad_norm": 22774.71875,
+ "learning_rate": 6.699000000000001e-05,
+ "loss": 0.6132,
+ "step": 6700
+ },
+ {
+ "epoch": 0.03482594765273113,
+ "grad_norm": 23878.90625,
+ "learning_rate": 6.749e-05,
+ "loss": 0.6127,
+ "step": 6750
+ },
+ {
+ "epoch": 0.03508391763534395,
+ "grad_norm": 28744.9921875,
+ "learning_rate": 6.799e-05,
+ "loss": 0.6203,
+ "step": 6800
+ },
+ {
+ "epoch": 0.03534188761795677,
+ "grad_norm": 24239.826171875,
+ "learning_rate": 6.849e-05,
+ "loss": 0.6069,
+ "step": 6850
+ },
+ {
+ "epoch": 0.0355998576005696,
+ "grad_norm": 27030.513671875,
+ "learning_rate": 6.899e-05,
+ "loss": 0.614,
+ "step": 6900
+ },
+ {
+ "epoch": 0.03585782758318242,
+ "grad_norm": 22872.59375,
+ "learning_rate": 6.949e-05,
+ "loss": 0.6068,
+ "step": 6950
+ },
+ {
+ "epoch": 0.03611579756579524,
+ "grad_norm": 23280.333984375,
+ "learning_rate": 6.999e-05,
+ "loss": 0.6064,
+ "step": 7000
+ },
+ {
+ "epoch": 0.03637376754840807,
+ "grad_norm": 24819.060546875,
+ "learning_rate": 7.049e-05,
+ "loss": 0.606,
+ "step": 7050
+ },
+ {
+ "epoch": 0.03663173753102089,
+ "grad_norm": 23739.595703125,
+ "learning_rate": 7.099e-05,
+ "loss": 0.6065,
+ "step": 7100
+ },
+ {
+ "epoch": 0.03688970751363371,
+ "grad_norm": 24261.28515625,
+ "learning_rate": 7.149e-05,
+ "loss": 0.6037,
+ "step": 7150
+ },
+ {
+ "epoch": 0.037147677496246534,
+ "grad_norm": 24133.744140625,
+ "learning_rate": 7.199000000000001e-05,
+ "loss": 0.6097,
+ "step": 7200
+ },
+ {
+ "epoch": 0.03740564747885936,
+ "grad_norm": 22903.197265625,
+ "learning_rate": 7.249e-05,
+ "loss": 0.6048,
+ "step": 7250
+ },
+ {
+ "epoch": 0.03766361746147218,
+ "grad_norm": 23503.970703125,
+ "learning_rate": 7.299e-05,
+ "loss": 0.6039,
+ "step": 7300
+ },
+ {
+ "epoch": 0.037921587444085005,
+ "grad_norm": 20935.388671875,
+ "learning_rate": 7.349e-05,
+ "loss": 0.6016,
+ "step": 7350
+ },
+ {
+ "epoch": 0.03817955742669783,
+ "grad_norm": 22991.720703125,
+ "learning_rate": 7.399e-05,
+ "loss": 0.6111,
+ "step": 7400
+ },
+ {
+ "epoch": 0.038437527409310654,
+ "grad_norm": 21915.90234375,
+ "learning_rate": 7.449e-05,
+ "loss": 0.5969,
+ "step": 7450
+ },
+ {
+ "epoch": 0.038695497391923475,
+ "grad_norm": 22474.25390625,
+ "learning_rate": 7.499e-05,
+ "loss": 0.6068,
+ "step": 7500
+ },
+ {
+ "epoch": 0.038953467374536296,
+ "grad_norm": 24122.150390625,
+ "learning_rate": 7.549000000000001e-05,
+ "loss": 0.6037,
+ "step": 7550
+ },
+ {
+ "epoch": 0.039211437357149125,
+ "grad_norm": 22262.220703125,
+ "learning_rate": 7.599000000000001e-05,
+ "loss": 0.5946,
+ "step": 7600
+ },
+ {
+ "epoch": 0.039469407339761946,
+ "grad_norm": 23959.7265625,
+ "learning_rate": 7.649000000000001e-05,
+ "loss": 0.598,
+ "step": 7650
+ },
+ {
+ "epoch": 0.03972737732237477,
+ "grad_norm": 21918.5859375,
+ "learning_rate": 7.699e-05,
+ "loss": 0.5959,
+ "step": 7700
+ },
+ {
+ "epoch": 0.03998534730498759,
+ "grad_norm": 23740.5390625,
+ "learning_rate": 7.749e-05,
+ "loss": 0.594,
+ "step": 7750
+ },
+ {
+ "epoch": 0.040243317287600416,
+ "grad_norm": 23406.4296875,
+ "learning_rate": 7.799e-05,
+ "loss": 0.6048,
+ "step": 7800
+ },
+ {
+ "epoch": 0.04050128727021324,
+ "grad_norm": 23423.201171875,
+ "learning_rate": 7.849e-05,
+ "loss": 0.5944,
+ "step": 7850
+ },
+ {
+ "epoch": 0.04075925725282606,
+ "grad_norm": 23187.76171875,
+ "learning_rate": 7.899000000000001e-05,
+ "loss": 0.5944,
+ "step": 7900
+ },
+ {
+ "epoch": 0.04101722723543889,
+ "grad_norm": 25532.4375,
+ "learning_rate": 7.949000000000001e-05,
+ "loss": 0.5978,
+ "step": 7950
+ },
+ {
+ "epoch": 0.04127519721805171,
+ "grad_norm": 23045.28515625,
+ "learning_rate": 7.999000000000001e-05,
+ "loss": 0.5968,
+ "step": 8000
+ },
+ {
+ "epoch": 0.04153316720066453,
+ "grad_norm": 22853.826171875,
+ "learning_rate": 8.049e-05,
+ "loss": 0.5915,
+ "step": 8050
+ },
+ {
+ "epoch": 0.04179113718327735,
+ "grad_norm": 21853.658203125,
+ "learning_rate": 8.099e-05,
+ "loss": 0.5932,
+ "step": 8100
+ },
+ {
+ "epoch": 0.04204910716589018,
+ "grad_norm": 22395.74609375,
+ "learning_rate": 8.149e-05,
+ "loss": 0.5925,
+ "step": 8150
+ },
+ {
+ "epoch": 0.042307077148503,
+ "grad_norm": 23933.40625,
+ "learning_rate": 8.199e-05,
+ "loss": 0.5878,
+ "step": 8200
+ },
+ {
+ "epoch": 0.04256504713111582,
+ "grad_norm": 21773.087890625,
+ "learning_rate": 8.249e-05,
+ "loss": 0.5916,
+ "step": 8250
+ },
+ {
+ "epoch": 0.04282301711372865,
+ "grad_norm": 22665.11328125,
+ "learning_rate": 8.299e-05,
+ "loss": 0.5906,
+ "step": 8300
+ },
+ {
+ "epoch": 0.04308098709634147,
+ "grad_norm": 22157.091796875,
+ "learning_rate": 8.349e-05,
+ "loss": 0.5873,
+ "step": 8350
+ },
+ {
+ "epoch": 0.04333895707895429,
+ "grad_norm": 21506.8125,
+ "learning_rate": 8.399e-05,
+ "loss": 0.5927,
+ "step": 8400
+ },
+ {
+ "epoch": 0.04359692706156711,
+ "grad_norm": 22143.341796875,
+ "learning_rate": 8.449e-05,
+ "loss": 0.5828,
+ "step": 8450
+ },
+ {
+ "epoch": 0.04385489704417994,
+ "grad_norm": 23341.23828125,
+ "learning_rate": 8.499e-05,
+ "loss": 0.5885,
+ "step": 8500
+ },
+ {
+ "epoch": 0.04411286702679276,
+ "grad_norm": 21876.96484375,
+ "learning_rate": 8.549000000000001e-05,
+ "loss": 0.5913,
+ "step": 8550
+ },
+ {
+ "epoch": 0.044370837009405584,
+ "grad_norm": 22307.29296875,
+ "learning_rate": 8.599000000000001e-05,
+ "loss": 0.583,
+ "step": 8600
+ },
+ {
+ "epoch": 0.04462880699201841,
+ "grad_norm": 22859.017578125,
+ "learning_rate": 8.649000000000001e-05,
+ "loss": 0.5889,
+ "step": 8650
+ },
+ {
+ "epoch": 0.04488677697463123,
+ "grad_norm": 22058.24609375,
+ "learning_rate": 8.699e-05,
+ "loss": 0.5848,
+ "step": 8700
+ },
+ {
+ "epoch": 0.045144746957244054,
+ "grad_norm": 22116.837890625,
+ "learning_rate": 8.749e-05,
+ "loss": 0.5858,
+ "step": 8750
+ },
+ {
+ "epoch": 0.045402716939856876,
+ "grad_norm": 23110.17578125,
+ "learning_rate": 8.799e-05,
+ "loss": 0.5855,
+ "step": 8800
+ },
+ {
+ "epoch": 0.045660686922469704,
+ "grad_norm": 24173.064453125,
+ "learning_rate": 8.849e-05,
+ "loss": 0.5878,
+ "step": 8850
+ },
+ {
+ "epoch": 0.045918656905082525,
+ "grad_norm": 21521.48046875,
+ "learning_rate": 8.899e-05,
+ "loss": 0.5914,
+ "step": 8900
+ },
+ {
+ "epoch": 0.046176626887695346,
+ "grad_norm": 24516.0,
+ "learning_rate": 8.949000000000001e-05,
+ "loss": 0.5849,
+ "step": 8950
+ },
+ {
+ "epoch": 0.046434596870308174,
+ "grad_norm": 22074.9609375,
+ "learning_rate": 8.999000000000001e-05,
+ "loss": 0.5848,
+ "step": 9000
+ },
+ {
+ "epoch": 0.046692566852920996,
+ "grad_norm": 21495.4140625,
+ "learning_rate": 9.049000000000001e-05,
+ "loss": 0.579,
+ "step": 9050
+ },
+ {
+ "epoch": 0.04695053683553382,
+ "grad_norm": 23548.224609375,
+ "learning_rate": 9.099000000000001e-05,
+ "loss": 0.5826,
+ "step": 9100
+ },
+ {
+ "epoch": 0.04720850681814664,
+ "grad_norm": 22144.51953125,
+ "learning_rate": 9.149e-05,
+ "loss": 0.5879,
+ "step": 9150
+ },
+ {
+ "epoch": 0.047466476800759466,
+ "grad_norm": 20656.185546875,
+ "learning_rate": 9.199e-05,
+ "loss": 0.5806,
+ "step": 9200
+ },
+ {
+ "epoch": 0.04772444678337229,
+ "grad_norm": 21228.814453125,
+ "learning_rate": 9.249e-05,
+ "loss": 0.5858,
+ "step": 9250
+ },
+ {
+ "epoch": 0.04798241676598511,
+ "grad_norm": 20801.869140625,
+ "learning_rate": 9.299e-05,
+ "loss": 0.5816,
+ "step": 9300
+ },
+ {
+ "epoch": 0.04824038674859793,
+ "grad_norm": 24044.283203125,
+ "learning_rate": 9.349e-05,
+ "loss": 0.5811,
+ "step": 9350
+ },
+ {
+ "epoch": 0.04849835673121076,
+ "grad_norm": 22395.47265625,
+ "learning_rate": 9.399e-05,
+ "loss": 0.5782,
+ "step": 9400
+ },
+ {
+ "epoch": 0.04875632671382358,
+ "grad_norm": 22353.078125,
+ "learning_rate": 9.449e-05,
+ "loss": 0.5758,
+ "step": 9450
+ },
+ {
+ "epoch": 0.0490142966964364,
+ "grad_norm": 22520.72265625,
+ "learning_rate": 9.499e-05,
+ "loss": 0.5752,
+ "step": 9500
+ },
+ {
+ "epoch": 0.04927226667904923,
+ "grad_norm": 22016.951171875,
+ "learning_rate": 9.549e-05,
+ "loss": 0.5764,
+ "step": 9550
+ },
+ {
+ "epoch": 0.04953023666166205,
+ "grad_norm": 20046.615234375,
+ "learning_rate": 9.599000000000001e-05,
+ "loss": 0.5759,
+ "step": 9600
+ },
+ {
+ "epoch": 0.04978820664427487,
+ "grad_norm": 21346.029296875,
+ "learning_rate": 9.649e-05,
+ "loss": 0.5798,
+ "step": 9650
+ },
+ {
+ "epoch": 0.05004617662688769,
+ "grad_norm": 22449.796875,
+ "learning_rate": 9.699e-05,
+ "loss": 0.5829,
+ "step": 9700
+ },
+ {
+ "epoch": 0.05030414660950052,
+ "grad_norm": 20538.751953125,
+ "learning_rate": 9.749e-05,
+ "loss": 0.5809,
+ "step": 9750
+ },
+ {
+ "epoch": 0.05056211659211334,
+ "grad_norm": 21123.19921875,
+ "learning_rate": 9.799e-05,
+ "loss": 0.5726,
+ "step": 9800
+ },
+ {
+ "epoch": 0.05082008657472616,
+ "grad_norm": 20853.08203125,
+ "learning_rate": 9.849e-05,
+ "loss": 0.5726,
+ "step": 9850
+ },
+ {
+ "epoch": 0.05107805655733899,
+ "grad_norm": 22160.841796875,
+ "learning_rate": 9.899e-05,
+ "loss": 0.5783,
+ "step": 9900
+ },
+ {
+ "epoch": 0.05133602653995181,
+ "grad_norm": 19711.109375,
+ "learning_rate": 9.949000000000001e-05,
+ "loss": 0.5722,
+ "step": 9950
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "grad_norm": 21442.310546875,
+ "learning_rate": 9.999000000000001e-05,
+ "loss": 0.5773,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "eval_loss": 0.5661358833312988,
+ "eval_runtime": 3272.6524,
+ "eval_samples_per_second": 947.586,
+ "eval_steps_per_second": 1.851,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051851966505177455,
+ "grad_norm": 21442.943359375,
+ "learning_rate": 9.999998718392692e-05,
+ "loss": 0.5727,
+ "step": 10050
+ },
+ {
+ "epoch": 0.05210993648779028,
+ "grad_norm": 21711.177734375,
+ "learning_rate": 9.999994768416664e-05,
+ "loss": 0.5707,
+ "step": 10100
+ },
+ {
+ "epoch": 0.052367906470403104,
+ "grad_norm": 21793.666015625,
+ "learning_rate": 9.999988149540251e-05,
+ "loss": 0.5727,
+ "step": 10150
+ },
+ {
+ "epoch": 0.052625876453015925,
+ "grad_norm": 18847.970703125,
+ "learning_rate": 9.999978861766983e-05,
+ "loss": 0.5726,
+ "step": 10200
+ },
+ {
+ "epoch": 0.052883846435628754,
+ "grad_norm": 22870.91796875,
+ "learning_rate": 9.999966905101816e-05,
+ "loss": 0.5751,
+ "step": 10250
+ },
+ {
+ "epoch": 0.053141816418241575,
+ "grad_norm": 23970.431640625,
+ "learning_rate": 9.999952279551135e-05,
+ "loss": 0.5745,
+ "step": 10300
+ },
+ {
+ "epoch": 0.053399786400854396,
+ "grad_norm": 19482.65625,
+ "learning_rate": 9.999934985122746e-05,
+ "loss": 0.5734,
+ "step": 10350
+ },
+ {
+ "epoch": 0.05365775638346722,
+ "grad_norm": 19720.65625,
+ "learning_rate": 9.999915021825879e-05,
+ "loss": 0.5697,
+ "step": 10400
+ },
+ {
+ "epoch": 0.053915726366080045,
+ "grad_norm": 21484.8203125,
+ "learning_rate": 9.99989238967119e-05,
+ "loss": 0.5678,
+ "step": 10450
+ },
+ {
+ "epoch": 0.05417369634869287,
+ "grad_norm": 20198.669921875,
+ "learning_rate": 9.999867088670762e-05,
+ "loss": 0.5731,
+ "step": 10500
+ },
+ {
+ "epoch": 0.05443166633130569,
+ "grad_norm": 19887.86328125,
+ "learning_rate": 9.999839118838099e-05,
+ "loss": 0.5711,
+ "step": 10550
+ },
+ {
+ "epoch": 0.05468963631391851,
+ "grad_norm": 21250.41796875,
+ "learning_rate": 9.999808480188131e-05,
+ "loss": 0.5653,
+ "step": 10600
+ },
+ {
+ "epoch": 0.05494760629653134,
+ "grad_norm": 21179.904296875,
+ "learning_rate": 9.999775172737211e-05,
+ "loss": 0.5666,
+ "step": 10650
+ },
+ {
+ "epoch": 0.05520557627914416,
+ "grad_norm": 21106.083984375,
+ "learning_rate": 9.999739196503119e-05,
+ "loss": 0.5656,
+ "step": 10700
+ },
+ {
+ "epoch": 0.05546354626175698,
+ "grad_norm": 19393.994140625,
+ "learning_rate": 9.999700551505057e-05,
+ "loss": 0.566,
+ "step": 10750
+ },
+ {
+ "epoch": 0.05572151624436981,
+ "grad_norm": 22788.060546875,
+ "learning_rate": 9.999659237763656e-05,
+ "loss": 0.5681,
+ "step": 10800
+ },
+ {
+ "epoch": 0.05597948622698263,
+ "grad_norm": 20106.75390625,
+ "learning_rate": 9.999615255300966e-05,
+ "loss": 0.5668,
+ "step": 10850
+ },
+ {
+ "epoch": 0.05623745620959545,
+ "grad_norm": 22390.466796875,
+ "learning_rate": 9.999568604140464e-05,
+ "loss": 0.5665,
+ "step": 10900
+ },
+ {
+ "epoch": 0.05649542619220827,
+ "grad_norm": 21145.044921875,
+ "learning_rate": 9.999519284307053e-05,
+ "loss": 0.5645,
+ "step": 10950
+ },
+ {
+ "epoch": 0.0567533961748211,
+ "grad_norm": 22501.64453125,
+ "learning_rate": 9.999467295827059e-05,
+ "loss": 0.5663,
+ "step": 11000
+ },
+ {
+ "epoch": 0.05701136615743392,
+ "grad_norm": 21079.431640625,
+ "learning_rate": 9.999412638728229e-05,
+ "loss": 0.5605,
+ "step": 11050
+ },
+ {
+ "epoch": 0.05726933614004674,
+ "grad_norm": 21501.4375,
+ "learning_rate": 9.999355313039742e-05,
+ "loss": 0.5643,
+ "step": 11100
+ },
+ {
+ "epoch": 0.05752730612265957,
+ "grad_norm": 22092.6328125,
+ "learning_rate": 9.999295318792194e-05,
+ "loss": 0.5602,
+ "step": 11150
+ },
+ {
+ "epoch": 0.05778527610527239,
+ "grad_norm": 19948.81640625,
+ "learning_rate": 9.999232656017613e-05,
+ "loss": 0.5649,
+ "step": 11200
+ },
+ {
+ "epoch": 0.05804324608788521,
+ "grad_norm": 20543.5859375,
+ "learning_rate": 9.999167324749443e-05,
+ "loss": 0.5598,
+ "step": 11250
+ },
+ {
+ "epoch": 0.058301216070498034,
+ "grad_norm": 20948.060546875,
+ "learning_rate": 9.99909932502256e-05,
+ "loss": 0.5631,
+ "step": 11300
+ },
+ {
+ "epoch": 0.05855918605311086,
+ "grad_norm": 20384.732421875,
+ "learning_rate": 9.999028656873257e-05,
+ "loss": 0.5592,
+ "step": 11350
+ },
+ {
+ "epoch": 0.05881715603572368,
+ "grad_norm": 20027.615234375,
+ "learning_rate": 9.99895532033926e-05,
+ "loss": 0.5658,
+ "step": 11400
+ },
+ {
+ "epoch": 0.059075126018336505,
+ "grad_norm": 20702.263671875,
+ "learning_rate": 9.99887931545971e-05,
+ "loss": 0.56,
+ "step": 11450
+ },
+ {
+ "epoch": 0.05933309600094933,
+ "grad_norm": 21589.52734375,
+ "learning_rate": 9.99880064227518e-05,
+ "loss": 0.5595,
+ "step": 11500
+ },
+ {
+ "epoch": 0.059591065983562154,
+ "grad_norm": 20375.181640625,
+ "learning_rate": 9.998719300827663e-05,
+ "loss": 0.5627,
+ "step": 11550
+ },
+ {
+ "epoch": 0.059849035966174975,
+ "grad_norm": 20207.677734375,
+ "learning_rate": 9.998635291160577e-05,
+ "loss": 0.5615,
+ "step": 11600
+ },
+ {
+ "epoch": 0.060107005948787796,
+ "grad_norm": 20898.291015625,
+ "learning_rate": 9.998548613318767e-05,
+ "loss": 0.5594,
+ "step": 11650
+ },
+ {
+ "epoch": 0.060364975931400625,
+ "grad_norm": 20133.822265625,
+ "learning_rate": 9.998459267348497e-05,
+ "loss": 0.5631,
+ "step": 11700
+ },
+ {
+ "epoch": 0.060622945914013446,
+ "grad_norm": 19021.533203125,
+ "learning_rate": 9.99836725329746e-05,
+ "loss": 0.5576,
+ "step": 11750
+ },
+ {
+ "epoch": 0.06088091589662627,
+ "grad_norm": 19088.32421875,
+ "learning_rate": 9.998272571214772e-05,
+ "loss": 0.5619,
+ "step": 11800
+ },
+ {
+ "epoch": 0.061138885879239095,
+ "grad_norm": 19742.841796875,
+ "learning_rate": 9.99817522115097e-05,
+ "loss": 0.5626,
+ "step": 11850
+ },
+ {
+ "epoch": 0.061396855861851916,
+ "grad_norm": 21584.271484375,
+ "learning_rate": 9.99807520315802e-05,
+ "loss": 0.555,
+ "step": 11900
+ },
+ {
+ "epoch": 0.06165482584446474,
+ "grad_norm": 19766.76953125,
+ "learning_rate": 9.997972517289309e-05,
+ "loss": 0.5584,
+ "step": 11950
+ },
+ {
+ "epoch": 0.06191279582707756,
+ "grad_norm": 19821.556640625,
+ "learning_rate": 9.997867163599646e-05,
+ "loss": 0.5623,
+ "step": 12000
+ },
+ {
+ "epoch": 0.06217076580969039,
+ "grad_norm": 19488.490234375,
+ "learning_rate": 9.997759142145271e-05,
+ "loss": 0.5591,
+ "step": 12050
+ },
+ {
+ "epoch": 0.06242873579230321,
+ "grad_norm": 20093.806640625,
+ "learning_rate": 9.997648452983842e-05,
+ "loss": 0.5597,
+ "step": 12100
+ },
+ {
+ "epoch": 0.06268670577491603,
+ "grad_norm": 20202.154296875,
+ "learning_rate": 9.997535096174441e-05,
+ "loss": 0.5542,
+ "step": 12150
+ },
+ {
+ "epoch": 0.06294467575752885,
+ "grad_norm": 19978.154296875,
+ "learning_rate": 9.99741907177758e-05,
+ "loss": 0.5629,
+ "step": 12200
+ },
+ {
+ "epoch": 0.06320264574014167,
+ "grad_norm": 19697.005859375,
+ "learning_rate": 9.997300379855186e-05,
+ "loss": 0.5571,
+ "step": 12250
+ },
+ {
+ "epoch": 0.06346061572275451,
+ "grad_norm": 20384.287109375,
+ "learning_rate": 9.997179020470618e-05,
+ "loss": 0.5526,
+ "step": 12300
+ },
+ {
+ "epoch": 0.06371858570536733,
+ "grad_norm": 18652.044921875,
+ "learning_rate": 9.997054993688651e-05,
+ "loss": 0.5531,
+ "step": 12350
+ },
+ {
+ "epoch": 0.06397655568798015,
+ "grad_norm": 20133.990234375,
+ "learning_rate": 9.996928299575493e-05,
+ "loss": 0.5561,
+ "step": 12400
+ },
+ {
+ "epoch": 0.06423452567059297,
+ "grad_norm": 20575.875,
+ "learning_rate": 9.996798938198766e-05,
+ "loss": 0.5559,
+ "step": 12450
+ },
+ {
+ "epoch": 0.06449249565320579,
+ "grad_norm": 19524.828125,
+ "learning_rate": 9.996666909627525e-05,
+ "loss": 0.5437,
+ "step": 12500
+ },
+ {
+ "epoch": 0.06475046563581861,
+ "grad_norm": 22106.927734375,
+ "learning_rate": 9.996532213932242e-05,
+ "loss": 0.5691,
+ "step": 12550
+ },
+ {
+ "epoch": 0.06500843561843143,
+ "grad_norm": 18443.4609375,
+ "learning_rate": 9.996394851184814e-05,
+ "loss": 0.553,
+ "step": 12600
+ },
+ {
+ "epoch": 0.06526640560104426,
+ "grad_norm": 21786.943359375,
+ "learning_rate": 9.996254821458565e-05,
+ "loss": 0.562,
+ "step": 12650
+ },
+ {
+ "epoch": 0.06552437558365709,
+ "grad_norm": 22699.578125,
+ "learning_rate": 9.996112124828241e-05,
+ "loss": 0.5526,
+ "step": 12700
+ },
+ {
+ "epoch": 0.06578234556626991,
+ "grad_norm": 18522.822265625,
+ "learning_rate": 9.995966761370006e-05,
+ "loss": 0.5525,
+ "step": 12750
+ },
+ {
+ "epoch": 0.06604031554888273,
+ "grad_norm": 19723.44140625,
+ "learning_rate": 9.995818731161458e-05,
+ "loss": 0.5555,
+ "step": 12800
+ },
+ {
+ "epoch": 0.06629828553149555,
+ "grad_norm": 20643.173828125,
+ "learning_rate": 9.995668034281606e-05,
+ "loss": 0.5506,
+ "step": 12850
+ },
+ {
+ "epoch": 0.06655625551410838,
+ "grad_norm": 19303.68359375,
+ "learning_rate": 9.995514670810896e-05,
+ "loss": 0.5599,
+ "step": 12900
+ },
+ {
+ "epoch": 0.0668142254967212,
+ "grad_norm": 19837.240234375,
+ "learning_rate": 9.995358640831187e-05,
+ "loss": 0.5514,
+ "step": 12950
+ },
+ {
+ "epoch": 0.06707219547933402,
+ "grad_norm": 19212.25390625,
+ "learning_rate": 9.995199944425764e-05,
+ "loss": 0.5542,
+ "step": 13000
+ },
+ {
+ "epoch": 0.06733016546194685,
+ "grad_norm": 19908.70703125,
+ "learning_rate": 9.995038581679337e-05,
+ "loss": 0.5421,
+ "step": 13050
+ },
+ {
+ "epoch": 0.06758813544455967,
+ "grad_norm": 18933.306640625,
+ "learning_rate": 9.994874552678038e-05,
+ "loss": 0.549,
+ "step": 13100
+ },
+ {
+ "epoch": 0.0678461054271725,
+ "grad_norm": 19313.990234375,
+ "learning_rate": 9.994707857509422e-05,
+ "loss": 0.5569,
+ "step": 13150
+ },
+ {
+ "epoch": 0.06810407540978532,
+ "grad_norm": 20800.984375,
+ "learning_rate": 9.99453849626247e-05,
+ "loss": 0.5518,
+ "step": 13200
+ },
+ {
+ "epoch": 0.06836204539239814,
+ "grad_norm": 18623.361328125,
+ "learning_rate": 9.994366469027583e-05,
+ "loss": 0.5549,
+ "step": 13250
+ },
+ {
+ "epoch": 0.06862001537501096,
+ "grad_norm": 19761.654296875,
+ "learning_rate": 9.994191775896584e-05,
+ "loss": 0.5467,
+ "step": 13300
+ },
+ {
+ "epoch": 0.06887798535762378,
+ "grad_norm": 20618.501953125,
+ "learning_rate": 9.994014416962723e-05,
+ "loss": 0.5554,
+ "step": 13350
+ },
+ {
+ "epoch": 0.06913595534023662,
+ "grad_norm": 19279.791015625,
+ "learning_rate": 9.993834392320668e-05,
+ "loss": 0.5567,
+ "step": 13400
+ },
+ {
+ "epoch": 0.06939392532284944,
+ "grad_norm": 18802.34375,
+ "learning_rate": 9.993651702066516e-05,
+ "loss": 0.5608,
+ "step": 13450
+ },
+ {
+ "epoch": 0.06965189530546226,
+ "grad_norm": 20132.15625,
+ "learning_rate": 9.993466346297779e-05,
+ "loss": 0.547,
+ "step": 13500
+ },
+ {
+ "epoch": 0.06990986528807508,
+ "grad_norm": 19165.26171875,
+ "learning_rate": 9.993278325113403e-05,
+ "loss": 0.5485,
+ "step": 13550
+ },
+ {
+ "epoch": 0.0701678352706879,
+ "grad_norm": 18493.01171875,
+ "learning_rate": 9.993087638613743e-05,
+ "loss": 0.5455,
+ "step": 13600
+ },
+ {
+ "epoch": 0.07042580525330072,
+ "grad_norm": 18225.78125,
+ "learning_rate": 9.992894286900589e-05,
+ "loss": 0.5499,
+ "step": 13650
+ },
+ {
+ "epoch": 0.07068377523591354,
+ "grad_norm": 20189.802734375,
+ "learning_rate": 9.992698270077146e-05,
+ "loss": 0.5468,
+ "step": 13700
+ },
+ {
+ "epoch": 0.07094174521852638,
+ "grad_norm": 20861.2734375,
+ "learning_rate": 9.992499588248043e-05,
+ "loss": 0.5588,
+ "step": 13750
+ },
+ {
+ "epoch": 0.0711997152011392,
+ "grad_norm": 19876.689453125,
+ "learning_rate": 9.992298241519335e-05,
+ "loss": 0.5486,
+ "step": 13800
+ },
+ {
+ "epoch": 0.07145768518375202,
+ "grad_norm": 18371.142578125,
+ "learning_rate": 9.992094229998497e-05,
+ "loss": 0.5475,
+ "step": 13850
+ },
+ {
+ "epoch": 0.07171565516636484,
+ "grad_norm": 18274.396484375,
+ "learning_rate": 9.991887553794423e-05,
+ "loss": 0.549,
+ "step": 13900
+ },
+ {
+ "epoch": 0.07197362514897766,
+ "grad_norm": 18204.947265625,
+ "learning_rate": 9.991678213017437e-05,
+ "loss": 0.5419,
+ "step": 13950
+ },
+ {
+ "epoch": 0.07223159513159048,
+ "grad_norm": 18634.162109375,
+ "learning_rate": 9.991466207779278e-05,
+ "loss": 0.5528,
+ "step": 14000
+ },
+ {
+ "epoch": 0.0724895651142033,
+ "grad_norm": 21840.685546875,
+ "learning_rate": 9.991251538193112e-05,
+ "loss": 0.5492,
+ "step": 14050
+ },
+ {
+ "epoch": 0.07274753509681614,
+ "grad_norm": 18888.935546875,
+ "learning_rate": 9.991034204373524e-05,
+ "loss": 0.5504,
+ "step": 14100
+ },
+ {
+ "epoch": 0.07300550507942896,
+ "grad_norm": 19353.263671875,
+ "learning_rate": 9.990814206436524e-05,
+ "loss": 0.5425,
+ "step": 14150
+ },
+ {
+ "epoch": 0.07326347506204178,
+ "grad_norm": 18891.79296875,
+ "learning_rate": 9.990591544499543e-05,
+ "loss": 0.551,
+ "step": 14200
+ },
+ {
+ "epoch": 0.0735214450446546,
+ "grad_norm": 17878.33203125,
+ "learning_rate": 9.99036621868143e-05,
+ "loss": 0.5403,
+ "step": 14250
+ },
+ {
+ "epoch": 0.07377941502726743,
+ "grad_norm": 18997.544921875,
+ "learning_rate": 9.990138229102465e-05,
+ "loss": 0.5458,
+ "step": 14300
+ },
+ {
+ "epoch": 0.07403738500988025,
+ "grad_norm": 22162.03125,
+ "learning_rate": 9.989907575884341e-05,
+ "loss": 0.5482,
+ "step": 14350
+ },
+ {
+ "epoch": 0.07429535499249307,
+ "grad_norm": 17026.828125,
+ "learning_rate": 9.989674259150177e-05,
+ "loss": 0.5487,
+ "step": 14400
+ },
+ {
+ "epoch": 0.0745533249751059,
+ "grad_norm": 18335.169921875,
+ "learning_rate": 9.989438279024513e-05,
+ "loss": 0.5459,
+ "step": 14450
+ },
+ {
+ "epoch": 0.07481129495771872,
+ "grad_norm": 19508.666015625,
+ "learning_rate": 9.989199635633309e-05,
+ "loss": 0.5456,
+ "step": 14500
+ },
+ {
+ "epoch": 0.07506926494033155,
+ "grad_norm": 20281.28515625,
+ "learning_rate": 9.98895832910395e-05,
+ "loss": 0.5455,
+ "step": 14550
+ },
+ {
+ "epoch": 0.07532723492294437,
+ "grad_norm": 20196.259765625,
+ "learning_rate": 9.98871435956524e-05,
+ "loss": 0.5474,
+ "step": 14600
+ },
+ {
+ "epoch": 0.07558520490555719,
+ "grad_norm": 18934.544921875,
+ "learning_rate": 9.988467727147409e-05,
+ "loss": 0.546,
+ "step": 14650
+ },
+ {
+ "epoch": 0.07584317488817001,
+ "grad_norm": 20257.126953125,
+ "learning_rate": 9.988218431982098e-05,
+ "loss": 0.5443,
+ "step": 14700
+ },
+ {
+ "epoch": 0.07610114487078283,
+ "grad_norm": 20330.86328125,
+ "learning_rate": 9.98796647420238e-05,
+ "loss": 0.5423,
+ "step": 14750
+ },
+ {
+ "epoch": 0.07635911485339567,
+ "grad_norm": 19077.765625,
+ "learning_rate": 9.987711853942745e-05,
+ "loss": 0.5446,
+ "step": 14800
+ },
+ {
+ "epoch": 0.07661708483600849,
+ "grad_norm": 20855.169921875,
+ "learning_rate": 9.987454571339103e-05,
+ "loss": 0.5427,
+ "step": 14850
+ },
+ {
+ "epoch": 0.07687505481862131,
+ "grad_norm": 20556.005859375,
+ "learning_rate": 9.987194626528788e-05,
+ "loss": 0.5417,
+ "step": 14900
+ },
+ {
+ "epoch": 0.07713302480123413,
+ "grad_norm": 19028.7421875,
+ "learning_rate": 9.986932019650553e-05,
+ "loss": 0.5412,
+ "step": 14950
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "grad_norm": 18669.166015625,
+ "learning_rate": 9.986666750844572e-05,
+ "loss": 0.5404,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "eval_loss": 0.5350670218467712,
+ "eval_runtime": 3217.7876,
+ "eval_samples_per_second": 963.743,
+ "eval_steps_per_second": 1.882,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07764896476645977,
+ "grad_norm": 19965.779296875,
+ "learning_rate": 9.98639882025244e-05,
+ "loss": 0.5439,
+ "step": 15050
+ },
+ {
+ "epoch": 0.07790693474907259,
+ "grad_norm": 18329.9921875,
+ "learning_rate": 9.986128228017173e-05,
+ "loss": 0.5425,
+ "step": 15100
+ },
+ {
+ "epoch": 0.07816490473168543,
+ "grad_norm": 20102.005859375,
+ "learning_rate": 9.985854974283211e-05,
+ "loss": 0.5444,
+ "step": 15150
+ },
+ {
+ "epoch": 0.07842287471429825,
+ "grad_norm": 19234.671875,
+ "learning_rate": 9.985579059196406e-05,
+ "loss": 0.5443,
+ "step": 15200
+ },
+ {
+ "epoch": 0.07868084469691107,
+ "grad_norm": 18324.298828125,
+ "learning_rate": 9.985300482904041e-05,
+ "loss": 0.5419,
+ "step": 15250
+ },
+ {
+ "epoch": 0.07893881467952389,
+ "grad_norm": 18766.2734375,
+ "learning_rate": 9.985019245554814e-05,
+ "loss": 0.5412,
+ "step": 15300
+ },
+ {
+ "epoch": 0.07919678466213671,
+ "grad_norm": 18805.765625,
+ "learning_rate": 9.984735347298841e-05,
+ "loss": 0.5443,
+ "step": 15350
+ },
+ {
+ "epoch": 0.07945475464474953,
+ "grad_norm": 17677.30078125,
+ "learning_rate": 9.984448788287665e-05,
+ "loss": 0.5421,
+ "step": 15400
+ },
+ {
+ "epoch": 0.07971272462736236,
+ "grad_norm": 19851.3515625,
+ "learning_rate": 9.984159568674243e-05,
+ "loss": 0.5426,
+ "step": 15450
+ },
+ {
+ "epoch": 0.07997069460997518,
+ "grad_norm": 18453.05859375,
+ "learning_rate": 9.983867688612956e-05,
+ "loss": 0.5445,
+ "step": 15500
+ },
+ {
+ "epoch": 0.08022866459258801,
+ "grad_norm": 17366.869140625,
+ "learning_rate": 9.983573148259603e-05,
+ "loss": 0.5451,
+ "step": 15550
+ },
+ {
+ "epoch": 0.08048663457520083,
+ "grad_norm": 18628.716796875,
+ "learning_rate": 9.983275947771407e-05,
+ "loss": 0.5373,
+ "step": 15600
+ },
+ {
+ "epoch": 0.08074460455781365,
+ "grad_norm": 19403.87890625,
+ "learning_rate": 9.982976087307003e-05,
+ "loss": 0.5489,
+ "step": 15650
+ },
+ {
+ "epoch": 0.08100257454042648,
+ "grad_norm": 18485.71875,
+ "learning_rate": 9.982673567026455e-05,
+ "loss": 0.538,
+ "step": 15700
+ },
+ {
+ "epoch": 0.0812605445230393,
+ "grad_norm": 19837.1796875,
+ "learning_rate": 9.982368387091241e-05,
+ "loss": 0.5356,
+ "step": 15750
+ },
+ {
+ "epoch": 0.08151851450565212,
+ "grad_norm": 19505.34375,
+ "learning_rate": 9.982060547664258e-05,
+ "loss": 0.5356,
+ "step": 15800
+ },
+ {
+ "epoch": 0.08177648448826494,
+ "grad_norm": 18645.48828125,
+ "learning_rate": 9.981750048909828e-05,
+ "loss": 0.5381,
+ "step": 15850
+ },
+ {
+ "epoch": 0.08203445447087777,
+ "grad_norm": 20191.73828125,
+ "learning_rate": 9.981436890993689e-05,
+ "loss": 0.535,
+ "step": 15900
+ },
+ {
+ "epoch": 0.0822924244534906,
+ "grad_norm": 18908.15625,
+ "learning_rate": 9.981121074082995e-05,
+ "loss": 0.5405,
+ "step": 15950
+ },
+ {
+ "epoch": 0.08255039443610342,
+ "grad_norm": 19517.73828125,
+ "learning_rate": 9.980802598346326e-05,
+ "loss": 0.5407,
+ "step": 16000
+ },
+ {
+ "epoch": 0.08280836441871624,
+ "grad_norm": 18368.16015625,
+ "learning_rate": 9.980481463953679e-05,
+ "loss": 0.5391,
+ "step": 16050
+ },
+ {
+ "epoch": 0.08306633440132906,
+ "grad_norm": 19727.35546875,
+ "learning_rate": 9.980157671076466e-05,
+ "loss": 0.537,
+ "step": 16100
+ },
+ {
+ "epoch": 0.08332430438394188,
+ "grad_norm": 20757.890625,
+ "learning_rate": 9.979831219887525e-05,
+ "loss": 0.5408,
+ "step": 16150
+ },
+ {
+ "epoch": 0.0835822743665547,
+ "grad_norm": 19334.708984375,
+ "learning_rate": 9.979502110561108e-05,
+ "loss": 0.5371,
+ "step": 16200
+ },
+ {
+ "epoch": 0.08384024434916754,
+ "grad_norm": 19338.498046875,
+ "learning_rate": 9.979170343272886e-05,
+ "loss": 0.531,
+ "step": 16250
+ },
+ {
+ "epoch": 0.08409821433178036,
+ "grad_norm": 18722.365234375,
+ "learning_rate": 9.978835918199949e-05,
+ "loss": 0.5398,
+ "step": 16300
+ },
+ {
+ "epoch": 0.08435618431439318,
+ "grad_norm": 18026.109375,
+ "learning_rate": 9.97849883552081e-05,
+ "loss": 0.5423,
+ "step": 16350
+ },
+ {
+ "epoch": 0.084614154297006,
+ "grad_norm": 19646.78125,
+ "learning_rate": 9.978159095415396e-05,
+ "loss": 0.5387,
+ "step": 16400
+ },
+ {
+ "epoch": 0.08487212427961882,
+ "grad_norm": 20091.552734375,
+ "learning_rate": 9.977816698065052e-05,
+ "loss": 0.5376,
+ "step": 16450
+ },
+ {
+ "epoch": 0.08513009426223164,
+ "grad_norm": 20539.73046875,
+ "learning_rate": 9.977471643652546e-05,
+ "loss": 0.5333,
+ "step": 16500
+ },
+ {
+ "epoch": 0.08538806424484446,
+ "grad_norm": 18306.24609375,
+ "learning_rate": 9.977123932362059e-05,
+ "loss": 0.5405,
+ "step": 16550
+ },
+ {
+ "epoch": 0.0856460342274573,
+ "grad_norm": 20133.513671875,
+ "learning_rate": 9.976773564379193e-05,
+ "loss": 0.541,
+ "step": 16600
+ },
+ {
+ "epoch": 0.08590400421007012,
+ "grad_norm": 19533.50390625,
+ "learning_rate": 9.976420539890969e-05,
+ "loss": 0.5333,
+ "step": 16650
+ },
+ {
+ "epoch": 0.08616197419268294,
+ "grad_norm": 19509.087890625,
+ "learning_rate": 9.976064859085822e-05,
+ "loss": 0.5347,
+ "step": 16700
+ },
+ {
+ "epoch": 0.08641994417529576,
+ "grad_norm": 19590.818359375,
+ "learning_rate": 9.97570652215361e-05,
+ "loss": 0.5377,
+ "step": 16750
+ },
+ {
+ "epoch": 0.08667791415790858,
+ "grad_norm": 19510.705078125,
+ "learning_rate": 9.975345529285605e-05,
+ "loss": 0.5367,
+ "step": 16800
+ },
+ {
+ "epoch": 0.0869358841405214,
+ "grad_norm": 20015.8046875,
+ "learning_rate": 9.974981880674499e-05,
+ "loss": 0.5386,
+ "step": 16850
+ },
+ {
+ "epoch": 0.08719385412313423,
+ "grad_norm": 18704.03125,
+ "learning_rate": 9.974615576514399e-05,
+ "loss": 0.5361,
+ "step": 16900
+ },
+ {
+ "epoch": 0.08745182410574706,
+ "grad_norm": 18257.869140625,
+ "learning_rate": 9.974246617000832e-05,
+ "loss": 0.5304,
+ "step": 16950
+ },
+ {
+ "epoch": 0.08770979408835988,
+ "grad_norm": 18150.517578125,
+ "learning_rate": 9.973875002330743e-05,
+ "loss": 0.5289,
+ "step": 17000
+ },
+ {
+ "epoch": 0.0879677640709727,
+ "grad_norm": 18326.041015625,
+ "learning_rate": 9.97350073270249e-05,
+ "loss": 0.5347,
+ "step": 17050
+ },
+ {
+ "epoch": 0.08822573405358553,
+ "grad_norm": 18199.224609375,
+ "learning_rate": 9.973123808315852e-05,
+ "loss": 0.5269,
+ "step": 17100
+ },
+ {
+ "epoch": 0.08848370403619835,
+ "grad_norm": 20351.447265625,
+ "learning_rate": 9.972744229372025e-05,
+ "loss": 0.5334,
+ "step": 17150
+ },
+ {
+ "epoch": 0.08874167401881117,
+ "grad_norm": 19200.703125,
+ "learning_rate": 9.97236199607362e-05,
+ "loss": 0.5316,
+ "step": 17200
+ },
+ {
+ "epoch": 0.08899964400142399,
+ "grad_norm": 18855.7890625,
+ "learning_rate": 9.971977108624664e-05,
+ "loss": 0.5342,
+ "step": 17250
+ },
+ {
+ "epoch": 0.08925761398403682,
+ "grad_norm": 18889.56640625,
+ "learning_rate": 9.971589567230606e-05,
+ "loss": 0.5361,
+ "step": 17300
+ },
+ {
+ "epoch": 0.08951558396664965,
+ "grad_norm": 18003.9921875,
+ "learning_rate": 9.971199372098304e-05,
+ "loss": 0.5353,
+ "step": 17350
+ },
+ {
+ "epoch": 0.08977355394926247,
+ "grad_norm": 19555.30078125,
+ "learning_rate": 9.970806523436041e-05,
+ "loss": 0.5306,
+ "step": 17400
+ },
+ {
+ "epoch": 0.09003152393187529,
+ "grad_norm": 19433.37890625,
+ "learning_rate": 9.97041102145351e-05,
+ "loss": 0.5341,
+ "step": 17450
+ },
+ {
+ "epoch": 0.09028949391448811,
+ "grad_norm": 19238.341796875,
+ "learning_rate": 9.97001286636182e-05,
+ "loss": 0.5372,
+ "step": 17500
+ },
+ {
+ "epoch": 0.09054746389710093,
+ "grad_norm": 18698.78125,
+ "learning_rate": 9.969612058373502e-05,
+ "loss": 0.5356,
+ "step": 17550
+ },
+ {
+ "epoch": 0.09080543387971375,
+ "grad_norm": 17953.580078125,
+ "learning_rate": 9.969208597702497e-05,
+ "loss": 0.529,
+ "step": 17600
+ },
+ {
+ "epoch": 0.09106340386232659,
+ "grad_norm": 17678.716796875,
+ "learning_rate": 9.968802484564168e-05,
+ "loss": 0.5329,
+ "step": 17650
+ },
+ {
+ "epoch": 0.09132137384493941,
+ "grad_norm": 20412.287109375,
+ "learning_rate": 9.968393719175286e-05,
+ "loss": 0.534,
+ "step": 17700
+ },
+ {
+ "epoch": 0.09157934382755223,
+ "grad_norm": 20080.16015625,
+ "learning_rate": 9.967982301754044e-05,
+ "loss": 0.5307,
+ "step": 17750
+ },
+ {
+ "epoch": 0.09183731381016505,
+ "grad_norm": 18570.314453125,
+ "learning_rate": 9.96756823252005e-05,
+ "loss": 0.526,
+ "step": 17800
+ },
+ {
+ "epoch": 0.09209528379277787,
+ "grad_norm": 18329.107421875,
+ "learning_rate": 9.967151511694324e-05,
+ "loss": 0.5273,
+ "step": 17850
+ },
+ {
+ "epoch": 0.09235325377539069,
+ "grad_norm": 19036.18359375,
+ "learning_rate": 9.966732139499304e-05,
+ "loss": 0.5275,
+ "step": 17900
+ },
+ {
+ "epoch": 0.09261122375800351,
+ "grad_norm": 18708.826171875,
+ "learning_rate": 9.966310116158844e-05,
+ "loss": 0.5313,
+ "step": 17950
+ },
+ {
+ "epoch": 0.09286919374061635,
+ "grad_norm": 18660.791015625,
+ "learning_rate": 9.96588544189821e-05,
+ "loss": 0.5303,
+ "step": 18000
+ },
+ {
+ "epoch": 0.09312716372322917,
+ "grad_norm": 19709.181640625,
+ "learning_rate": 9.965458116944086e-05,
+ "loss": 0.5347,
+ "step": 18050
+ },
+ {
+ "epoch": 0.09338513370584199,
+ "grad_norm": 19683.798828125,
+ "learning_rate": 9.96502814152457e-05,
+ "loss": 0.5359,
+ "step": 18100
+ },
+ {
+ "epoch": 0.09364310368845481,
+ "grad_norm": 19533.09765625,
+ "learning_rate": 9.964595515869175e-05,
+ "loss": 0.5263,
+ "step": 18150
+ },
+ {
+ "epoch": 0.09390107367106763,
+ "grad_norm": 20254.892578125,
+ "learning_rate": 9.964160240208826e-05,
+ "loss": 0.5307,
+ "step": 18200
+ },
+ {
+ "epoch": 0.09415904365368045,
+ "grad_norm": 21316.876953125,
+ "learning_rate": 9.963722314775868e-05,
+ "loss": 0.5316,
+ "step": 18250
+ },
+ {
+ "epoch": 0.09441701363629328,
+ "grad_norm": 20027.03515625,
+ "learning_rate": 9.963281739804054e-05,
+ "loss": 0.5274,
+ "step": 18300
+ },
+ {
+ "epoch": 0.0946749836189061,
+ "grad_norm": 18551.994140625,
+ "learning_rate": 9.962838515528554e-05,
+ "loss": 0.5339,
+ "step": 18350
+ },
+ {
+ "epoch": 0.09493295360151893,
+ "grad_norm": 17779.97265625,
+ "learning_rate": 9.962392642185956e-05,
+ "loss": 0.5301,
+ "step": 18400
+ },
+ {
+ "epoch": 0.09519092358413175,
+ "grad_norm": 20620.232421875,
+ "learning_rate": 9.961944120014256e-05,
+ "loss": 0.522,
+ "step": 18450
+ },
+ {
+ "epoch": 0.09544889356674457,
+ "grad_norm": 18669.73046875,
+ "learning_rate": 9.961492949252868e-05,
+ "loss": 0.5261,
+ "step": 18500
+ },
+ {
+ "epoch": 0.0957068635493574,
+ "grad_norm": 19528.4765625,
+ "learning_rate": 9.961039130142617e-05,
+ "loss": 0.5276,
+ "step": 18550
+ },
+ {
+ "epoch": 0.09596483353197022,
+ "grad_norm": 19643.099609375,
+ "learning_rate": 9.960582662925744e-05,
+ "loss": 0.5332,
+ "step": 18600
+ },
+ {
+ "epoch": 0.09622280351458304,
+ "grad_norm": 19024.4375,
+ "learning_rate": 9.960123547845901e-05,
+ "loss": 0.529,
+ "step": 18650
+ },
+ {
+ "epoch": 0.09648077349719586,
+ "grad_norm": 20228.248046875,
+ "learning_rate": 9.959661785148155e-05,
+ "loss": 0.5322,
+ "step": 18700
+ },
+ {
+ "epoch": 0.0967387434798087,
+ "grad_norm": 20120.126953125,
+ "learning_rate": 9.959197375078986e-05,
+ "loss": 0.5256,
+ "step": 18750
+ },
+ {
+ "epoch": 0.09699671346242152,
+ "grad_norm": 19894.423828125,
+ "learning_rate": 9.95873031788629e-05,
+ "loss": 0.5257,
+ "step": 18800
+ },
+ {
+ "epoch": 0.09725468344503434,
+ "grad_norm": 18450.8671875,
+ "learning_rate": 9.958260613819367e-05,
+ "loss": 0.5268,
+ "step": 18850
+ },
+ {
+ "epoch": 0.09751265342764716,
+ "grad_norm": 22775.53125,
+ "learning_rate": 9.95778826312894e-05,
+ "loss": 0.5293,
+ "step": 18900
+ },
+ {
+ "epoch": 0.09777062341025998,
+ "grad_norm": 17769.38671875,
+ "learning_rate": 9.95731326606714e-05,
+ "loss": 0.5281,
+ "step": 18950
+ },
+ {
+ "epoch": 0.0980285933928728,
+ "grad_norm": 20731.322265625,
+ "learning_rate": 9.956835622887514e-05,
+ "loss": 0.5327,
+ "step": 19000
+ },
+ {
+ "epoch": 0.09828656337548562,
+ "grad_norm": 20059.11328125,
+ "learning_rate": 9.956355333845014e-05,
+ "loss": 0.5279,
+ "step": 19050
+ },
+ {
+ "epoch": 0.09854453335809846,
+ "grad_norm": 17477.626953125,
+ "learning_rate": 9.955872399196012e-05,
+ "loss": 0.5257,
+ "step": 19100
+ },
+ {
+ "epoch": 0.09880250334071128,
+ "grad_norm": 20293.232421875,
+ "learning_rate": 9.955386819198287e-05,
+ "loss": 0.5258,
+ "step": 19150
+ },
+ {
+ "epoch": 0.0990604733233241,
+ "grad_norm": 19330.4140625,
+ "learning_rate": 9.954898594111035e-05,
+ "loss": 0.5231,
+ "step": 19200
+ },
+ {
+ "epoch": 0.09931844330593692,
+ "grad_norm": 19410.818359375,
+ "learning_rate": 9.954407724194858e-05,
+ "loss": 0.5286,
+ "step": 19250
+ },
+ {
+ "epoch": 0.09957641328854974,
+ "grad_norm": 18320.552734375,
+ "learning_rate": 9.953914209711775e-05,
+ "loss": 0.5287,
+ "step": 19300
+ },
+ {
+ "epoch": 0.09983438327116256,
+ "grad_norm": 17585.583984375,
+ "learning_rate": 9.953418050925213e-05,
+ "loss": 0.5265,
+ "step": 19350
+ },
+ {
+ "epoch": 0.10009235325377538,
+ "grad_norm": 20318.298828125,
+ "learning_rate": 9.952919248100012e-05,
+ "loss": 0.5292,
+ "step": 19400
+ },
+ {
+ "epoch": 0.10035032323638822,
+ "grad_norm": 20239.33984375,
+ "learning_rate": 9.952417801502426e-05,
+ "loss": 0.522,
+ "step": 19450
+ },
+ {
+ "epoch": 0.10060829321900104,
+ "grad_norm": 18922.158203125,
+ "learning_rate": 9.951913711400115e-05,
+ "loss": 0.5275,
+ "step": 19500
+ },
+ {
+ "epoch": 0.10086626320161386,
+ "grad_norm": 18332.673828125,
+ "learning_rate": 9.951406978062153e-05,
+ "loss": 0.5282,
+ "step": 19550
+ },
+ {
+ "epoch": 0.10112423318422668,
+ "grad_norm": 19321.662109375,
+ "learning_rate": 9.950897601759024e-05,
+ "loss": 0.5236,
+ "step": 19600
+ },
+ {
+ "epoch": 0.1013822031668395,
+ "grad_norm": 19050.42578125,
+ "learning_rate": 9.950385582762624e-05,
+ "loss": 0.5269,
+ "step": 19650
+ },
+ {
+ "epoch": 0.10164017314945233,
+ "grad_norm": 18592.8125,
+ "learning_rate": 9.949870921346259e-05,
+ "loss": 0.5294,
+ "step": 19700
+ },
+ {
+ "epoch": 0.10189814313206515,
+ "grad_norm": 17702.080078125,
+ "learning_rate": 9.949353617784644e-05,
+ "loss": 0.5321,
+ "step": 19750
+ },
+ {
+ "epoch": 0.10215611311467798,
+ "grad_norm": 18935.71875,
+ "learning_rate": 9.948833672353907e-05,
+ "loss": 0.5279,
+ "step": 19800
+ },
+ {
+ "epoch": 0.1024140830972908,
+ "grad_norm": 19814.96484375,
+ "learning_rate": 9.948311085331585e-05,
+ "loss": 0.5174,
+ "step": 19850
+ },
+ {
+ "epoch": 0.10267205307990362,
+ "grad_norm": 18945.4375,
+ "learning_rate": 9.947785856996623e-05,
+ "loss": 0.525,
+ "step": 19900
+ },
+ {
+ "epoch": 0.10293002306251645,
+ "grad_norm": 19162.28125,
+ "learning_rate": 9.947257987629379e-05,
+ "loss": 0.5268,
+ "step": 19950
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "grad_norm": 18814.861328125,
+ "learning_rate": 9.94672747751162e-05,
+ "loss": 0.5191,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "eval_loss": 0.5160176157951355,
+ "eval_runtime": 3272.5369,
+ "eval_samples_per_second": 947.62,
+ "eval_steps_per_second": 1.851,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10344596302774209,
+ "grad_norm": 19089.77734375,
+ "learning_rate": 9.94619432692652e-05,
+ "loss": 0.5254,
+ "step": 20050
+ },
+ {
+ "epoch": 0.10370393301035491,
+ "grad_norm": 19005.53125,
+ "learning_rate": 9.945658536158667e-05,
+ "loss": 0.525,
+ "step": 20100
+ },
+ {
+ "epoch": 0.10396190299296774,
+ "grad_norm": 20896.8125,
+ "learning_rate": 9.945120105494054e-05,
+ "loss": 0.5173,
+ "step": 20150
+ },
+ {
+ "epoch": 0.10421987297558057,
+ "grad_norm": 19254.22265625,
+ "learning_rate": 9.944579035220085e-05,
+ "loss": 0.5195,
+ "step": 20200
+ },
+ {
+ "epoch": 0.10447784295819339,
+ "grad_norm": 19317.572265625,
+ "learning_rate": 9.944035325625573e-05,
+ "loss": 0.5239,
+ "step": 20250
+ },
+ {
+ "epoch": 0.10473581294080621,
+ "grad_norm": 18661.330078125,
+ "learning_rate": 9.94348897700074e-05,
+ "loss": 0.5243,
+ "step": 20300
+ },
+ {
+ "epoch": 0.10499378292341903,
+ "grad_norm": 18914.298828125,
+ "learning_rate": 9.942939989637216e-05,
+ "loss": 0.5247,
+ "step": 20350
+ },
+ {
+ "epoch": 0.10525175290603185,
+ "grad_norm": 17788.77734375,
+ "learning_rate": 9.942388363828041e-05,
+ "loss": 0.5205,
+ "step": 20400
+ },
+ {
+ "epoch": 0.10550972288864467,
+ "grad_norm": 17314.578125,
+ "learning_rate": 9.941834099867659e-05,
+ "loss": 0.5182,
+ "step": 20450
+ },
+ {
+ "epoch": 0.10576769287125751,
+ "grad_norm": 18627.068359375,
+ "learning_rate": 9.941277198051931e-05,
+ "loss": 0.5208,
+ "step": 20500
+ },
+ {
+ "epoch": 0.10602566285387033,
+ "grad_norm": 18274.4609375,
+ "learning_rate": 9.940717658678113e-05,
+ "loss": 0.5244,
+ "step": 20550
+ },
+ {
+ "epoch": 0.10628363283648315,
+ "grad_norm": 18668.767578125,
+ "learning_rate": 9.940155482044884e-05,
+ "loss": 0.5237,
+ "step": 20600
+ },
+ {
+ "epoch": 0.10654160281909597,
+ "grad_norm": 17703.703125,
+ "learning_rate": 9.939590668452316e-05,
+ "loss": 0.5148,
+ "step": 20650
+ },
+ {
+ "epoch": 0.10679957280170879,
+ "grad_norm": 18372.7578125,
+ "learning_rate": 9.939023218201901e-05,
+ "loss": 0.522,
+ "step": 20700
+ },
+ {
+ "epoch": 0.10705754278432161,
+ "grad_norm": 18439.521484375,
+ "learning_rate": 9.93845313159653e-05,
+ "loss": 0.5177,
+ "step": 20750
+ },
+ {
+ "epoch": 0.10731551276693443,
+ "grad_norm": 18812.10546875,
+ "learning_rate": 9.937880408940504e-05,
+ "loss": 0.5161,
+ "step": 20800
+ },
+ {
+ "epoch": 0.10757348274954727,
+ "grad_norm": 19163.4296875,
+ "learning_rate": 9.937305050539534e-05,
+ "loss": 0.5175,
+ "step": 20850
+ },
+ {
+ "epoch": 0.10783145273216009,
+ "grad_norm": 19459.3984375,
+ "learning_rate": 9.936727056700732e-05,
+ "loss": 0.5257,
+ "step": 20900
+ },
+ {
+ "epoch": 0.10808942271477291,
+ "grad_norm": 20272.22265625,
+ "learning_rate": 9.93614642773262e-05,
+ "loss": 0.5244,
+ "step": 20950
+ },
+ {
+ "epoch": 0.10834739269738573,
+ "grad_norm": 19995.736328125,
+ "learning_rate": 9.93556316394513e-05,
+ "loss": 0.5179,
+ "step": 21000
+ },
+ {
+ "epoch": 0.10860536267999855,
+ "grad_norm": 20567.369140625,
+ "learning_rate": 9.934977265649594e-05,
+ "loss": 0.528,
+ "step": 21050
+ },
+ {
+ "epoch": 0.10886333266261138,
+ "grad_norm": 19328.57421875,
+ "learning_rate": 9.934388733158753e-05,
+ "loss": 0.5249,
+ "step": 21100
+ },
+ {
+ "epoch": 0.1091213026452242,
+ "grad_norm": 17305.19921875,
+ "learning_rate": 9.933797566786757e-05,
+ "loss": 0.5163,
+ "step": 21150
+ },
+ {
+ "epoch": 0.10937927262783702,
+ "grad_norm": 19983.99609375,
+ "learning_rate": 9.933203766849155e-05,
+ "loss": 0.5227,
+ "step": 21200
+ },
+ {
+ "epoch": 0.10963724261044985,
+ "grad_norm": 18918.16015625,
+ "learning_rate": 9.93260733366291e-05,
+ "loss": 0.521,
+ "step": 21250
+ },
+ {
+ "epoch": 0.10989521259306267,
+ "grad_norm": 19260.40625,
+ "learning_rate": 9.932008267546384e-05,
+ "loss": 0.5195,
+ "step": 21300
+ },
+ {
+ "epoch": 0.1101531825756755,
+ "grad_norm": 16713.015625,
+ "learning_rate": 9.931406568819348e-05,
+ "loss": 0.5187,
+ "step": 21350
+ },
+ {
+ "epoch": 0.11041115255828832,
+ "grad_norm": 19787.67578125,
+ "learning_rate": 9.930802237802976e-05,
+ "loss": 0.5152,
+ "step": 21400
+ },
+ {
+ "epoch": 0.11066912254090114,
+ "grad_norm": 20632.775390625,
+ "learning_rate": 9.93019527481985e-05,
+ "loss": 0.5158,
+ "step": 21450
+ },
+ {
+ "epoch": 0.11092709252351396,
+ "grad_norm": 18545.748046875,
+ "learning_rate": 9.929585680193951e-05,
+ "loss": 0.5161,
+ "step": 21500
+ },
+ {
+ "epoch": 0.11118506250612678,
+ "grad_norm": 18961.138671875,
+ "learning_rate": 9.928973454250674e-05,
+ "loss": 0.5192,
+ "step": 21550
+ },
+ {
+ "epoch": 0.11144303248873962,
+ "grad_norm": 18970.013671875,
+ "learning_rate": 9.928358597316812e-05,
+ "loss": 0.5211,
+ "step": 21600
+ },
+ {
+ "epoch": 0.11170100247135244,
+ "grad_norm": 20800.046875,
+ "learning_rate": 9.927741109720561e-05,
+ "loss": 0.5143,
+ "step": 21650
+ },
+ {
+ "epoch": 0.11195897245396526,
+ "grad_norm": 18738.564453125,
+ "learning_rate": 9.927120991791528e-05,
+ "loss": 0.5232,
+ "step": 21700
+ },
+ {
+ "epoch": 0.11221694243657808,
+ "grad_norm": 18495.798828125,
+ "learning_rate": 9.926498243860715e-05,
+ "loss": 0.5176,
+ "step": 21750
+ },
+ {
+ "epoch": 0.1124749124191909,
+ "grad_norm": 18129.375,
+ "learning_rate": 9.925872866260537e-05,
+ "loss": 0.5132,
+ "step": 21800
+ },
+ {
+ "epoch": 0.11273288240180372,
+ "grad_norm": 19332.751953125,
+ "learning_rate": 9.925244859324807e-05,
+ "loss": 0.5135,
+ "step": 21850
+ },
+ {
+ "epoch": 0.11299085238441654,
+ "grad_norm": 19395.544921875,
+ "learning_rate": 9.924614223388742e-05,
+ "loss": 0.5191,
+ "step": 21900
+ },
+ {
+ "epoch": 0.11324882236702938,
+ "grad_norm": 20292.890625,
+ "learning_rate": 9.923980958788964e-05,
+ "loss": 0.5212,
+ "step": 21950
+ },
+ {
+ "epoch": 0.1135067923496422,
+ "grad_norm": 20309.033203125,
+ "learning_rate": 9.923345065863498e-05,
+ "loss": 0.5134,
+ "step": 22000
+ },
+ {
+ "epoch": 0.11376476233225502,
+ "grad_norm": 17513.578125,
+ "learning_rate": 9.922706544951772e-05,
+ "loss": 0.5216,
+ "step": 22050
+ },
+ {
+ "epoch": 0.11402273231486784,
+ "grad_norm": 18886.10546875,
+ "learning_rate": 9.922065396394614e-05,
+ "loss": 0.5219,
+ "step": 22100
+ },
+ {
+ "epoch": 0.11428070229748066,
+ "grad_norm": 19656.1484375,
+ "learning_rate": 9.921421620534257e-05,
+ "loss": 0.5163,
+ "step": 22150
+ },
+ {
+ "epoch": 0.11453867228009348,
+ "grad_norm": 18463.068359375,
+ "learning_rate": 9.920775217714338e-05,
+ "loss": 0.5198,
+ "step": 22200
+ },
+ {
+ "epoch": 0.1147966422627063,
+ "grad_norm": 20666.400390625,
+ "learning_rate": 9.920126188279892e-05,
+ "loss": 0.5164,
+ "step": 22250
+ },
+ {
+ "epoch": 0.11505461224531914,
+ "grad_norm": 20401.681640625,
+ "learning_rate": 9.919474532577359e-05,
+ "loss": 0.5163,
+ "step": 22300
+ },
+ {
+ "epoch": 0.11531258222793196,
+ "grad_norm": 21289.541015625,
+ "learning_rate": 9.918820250954581e-05,
+ "loss": 0.5114,
+ "step": 22350
+ },
+ {
+ "epoch": 0.11557055221054478,
+ "grad_norm": 17559.50390625,
+ "learning_rate": 9.918163343760801e-05,
+ "loss": 0.5156,
+ "step": 22400
+ },
+ {
+ "epoch": 0.1158285221931576,
+ "grad_norm": 17041.087890625,
+ "learning_rate": 9.917503811346662e-05,
+ "loss": 0.5146,
+ "step": 22450
+ },
+ {
+ "epoch": 0.11608649217577043,
+ "grad_norm": 20508.087890625,
+ "learning_rate": 9.916841654064212e-05,
+ "loss": 0.5202,
+ "step": 22500
+ },
+ {
+ "epoch": 0.11634446215838325,
+ "grad_norm": 21307.646484375,
+ "learning_rate": 9.916176872266894e-05,
+ "loss": 0.5108,
+ "step": 22550
+ },
+ {
+ "epoch": 0.11660243214099607,
+ "grad_norm": 21765.580078125,
+ "learning_rate": 9.91550946630956e-05,
+ "loss": 0.5158,
+ "step": 22600
+ },
+ {
+ "epoch": 0.1168604021236089,
+ "grad_norm": 18173.646484375,
+ "learning_rate": 9.914839436548454e-05,
+ "loss": 0.5081,
+ "step": 22650
+ },
+ {
+ "epoch": 0.11711837210622172,
+ "grad_norm": 19044.880859375,
+ "learning_rate": 9.914166783341227e-05,
+ "loss": 0.5144,
+ "step": 22700
+ },
+ {
+ "epoch": 0.11737634208883455,
+ "grad_norm": 19291.37109375,
+ "learning_rate": 9.91349150704693e-05,
+ "loss": 0.5147,
+ "step": 22750
+ },
+ {
+ "epoch": 0.11763431207144737,
+ "grad_norm": 16757.376953125,
+ "learning_rate": 9.91281360802601e-05,
+ "loss": 0.5163,
+ "step": 22800
+ },
+ {
+ "epoch": 0.11789228205406019,
+ "grad_norm": 18870.287109375,
+ "learning_rate": 9.912133086640318e-05,
+ "loss": 0.512,
+ "step": 22850
+ },
+ {
+ "epoch": 0.11815025203667301,
+ "grad_norm": 20520.115234375,
+ "learning_rate": 9.911449943253102e-05,
+ "loss": 0.5175,
+ "step": 22900
+ },
+ {
+ "epoch": 0.11840822201928583,
+ "grad_norm": 20585.21484375,
+ "learning_rate": 9.910764178229011e-05,
+ "loss": 0.5114,
+ "step": 22950
+ },
+ {
+ "epoch": 0.11866619200189867,
+ "grad_norm": 18660.384765625,
+ "learning_rate": 9.910075791934092e-05,
+ "loss": 0.5115,
+ "step": 23000
+ },
+ {
+ "epoch": 0.11892416198451149,
+ "grad_norm": 19391.318359375,
+ "learning_rate": 9.909384784735794e-05,
+ "loss": 0.5198,
+ "step": 23050
+ },
+ {
+ "epoch": 0.11918213196712431,
+ "grad_norm": 18007.306640625,
+ "learning_rate": 9.908691157002962e-05,
+ "loss": 0.5125,
+ "step": 23100
+ },
+ {
+ "epoch": 0.11944010194973713,
+ "grad_norm": 20804.501953125,
+ "learning_rate": 9.907994909105842e-05,
+ "loss": 0.516,
+ "step": 23150
+ },
+ {
+ "epoch": 0.11969807193234995,
+ "grad_norm": 18307.63671875,
+ "learning_rate": 9.907296041416076e-05,
+ "loss": 0.5108,
+ "step": 23200
+ },
+ {
+ "epoch": 0.11995604191496277,
+ "grad_norm": 19694.552734375,
+ "learning_rate": 9.906594554306709e-05,
+ "loss": 0.5092,
+ "step": 23250
+ },
+ {
+ "epoch": 0.12021401189757559,
+ "grad_norm": 20234.0703125,
+ "learning_rate": 9.90589044815218e-05,
+ "loss": 0.515,
+ "step": 23300
+ },
+ {
+ "epoch": 0.12047198188018843,
+ "grad_norm": 18483.4296875,
+ "learning_rate": 9.905183723328327e-05,
+ "loss": 0.5127,
+ "step": 23350
+ },
+ {
+ "epoch": 0.12072995186280125,
+ "grad_norm": 17447.51953125,
+ "learning_rate": 9.904474380212384e-05,
+ "loss": 0.5107,
+ "step": 23400
+ },
+ {
+ "epoch": 0.12098792184541407,
+ "grad_norm": 18881.7109375,
+ "learning_rate": 9.903762419182986e-05,
+ "loss": 0.5177,
+ "step": 23450
+ },
+ {
+ "epoch": 0.12124589182802689,
+ "grad_norm": 17861.990234375,
+ "learning_rate": 9.903047840620168e-05,
+ "loss": 0.5128,
+ "step": 23500
+ },
+ {
+ "epoch": 0.12150386181063971,
+ "grad_norm": 19111.53515625,
+ "learning_rate": 9.902330644905351e-05,
+ "loss": 0.5134,
+ "step": 23550
+ },
+ {
+ "epoch": 0.12176183179325253,
+ "grad_norm": 18461.107421875,
+ "learning_rate": 9.901610832421366e-05,
+ "loss": 0.51,
+ "step": 23600
+ },
+ {
+ "epoch": 0.12201980177586536,
+ "grad_norm": 18103.701171875,
+ "learning_rate": 9.900888403552431e-05,
+ "loss": 0.5131,
+ "step": 23650
+ },
+ {
+ "epoch": 0.12227777175847819,
+ "grad_norm": 18334.755859375,
+ "learning_rate": 9.900163358684168e-05,
+ "loss": 0.511,
+ "step": 23700
+ },
+ {
+ "epoch": 0.12253574174109101,
+ "grad_norm": 17476.322265625,
+ "learning_rate": 9.89943569820359e-05,
+ "loss": 0.5151,
+ "step": 23750
+ },
+ {
+ "epoch": 0.12279371172370383,
+ "grad_norm": 18698.09765625,
+ "learning_rate": 9.898705422499107e-05,
+ "loss": 0.5146,
+ "step": 23800
+ },
+ {
+ "epoch": 0.12305168170631665,
+ "grad_norm": 18321.80859375,
+ "learning_rate": 9.897972531960528e-05,
+ "loss": 0.5109,
+ "step": 23850
+ },
+ {
+ "epoch": 0.12330965168892948,
+ "grad_norm": 18234.361328125,
+ "learning_rate": 9.897237026979056e-05,
+ "loss": 0.5115,
+ "step": 23900
+ },
+ {
+ "epoch": 0.1235676216715423,
+ "grad_norm": 19737.849609375,
+ "learning_rate": 9.896498907947287e-05,
+ "loss": 0.5155,
+ "step": 23950
+ },
+ {
+ "epoch": 0.12382559165415512,
+ "grad_norm": 19136.279296875,
+ "learning_rate": 9.895758175259218e-05,
+ "loss": 0.5162,
+ "step": 24000
+ },
+ {
+ "epoch": 0.12408356163676794,
+ "grad_norm": 18575.431640625,
+ "learning_rate": 9.895014829310235e-05,
+ "loss": 0.5141,
+ "step": 24050
+ },
+ {
+ "epoch": 0.12434153161938077,
+ "grad_norm": 17589.353515625,
+ "learning_rate": 9.894268870497121e-05,
+ "loss": 0.501,
+ "step": 24100
+ },
+ {
+ "epoch": 0.1245995016019936,
+ "grad_norm": 19781.830078125,
+ "learning_rate": 9.893520299218057e-05,
+ "loss": 0.5128,
+ "step": 24150
+ },
+ {
+ "epoch": 0.12485747158460642,
+ "grad_norm": 17501.150390625,
+ "learning_rate": 9.892769115872617e-05,
+ "loss": 0.5113,
+ "step": 24200
+ },
+ {
+ "epoch": 0.12511544156721924,
+ "grad_norm": 21107.34375,
+ "learning_rate": 9.892015320861762e-05,
+ "loss": 0.5041,
+ "step": 24250
+ },
+ {
+ "epoch": 0.12537341154983206,
+ "grad_norm": 17529.345703125,
+ "learning_rate": 9.89125891458786e-05,
+ "loss": 0.5093,
+ "step": 24300
+ },
+ {
+ "epoch": 0.12563138153244488,
+ "grad_norm": 18061.890625,
+ "learning_rate": 9.890499897454663e-05,
+ "loss": 0.5111,
+ "step": 24350
+ },
+ {
+ "epoch": 0.1258893515150577,
+ "grad_norm": 21213.177734375,
+ "learning_rate": 9.889738269867318e-05,
+ "loss": 0.5106,
+ "step": 24400
+ },
+ {
+ "epoch": 0.12614732149767052,
+ "grad_norm": 17838.625,
+ "learning_rate": 9.88897403223237e-05,
+ "loss": 0.5144,
+ "step": 24450
+ },
+ {
+ "epoch": 0.12640529148028334,
+ "grad_norm": 19047.787109375,
+ "learning_rate": 9.888207184957752e-05,
+ "loss": 0.5133,
+ "step": 24500
+ },
+ {
+ "epoch": 0.12666326146289617,
+ "grad_norm": 17355.26171875,
+ "learning_rate": 9.887437728452794e-05,
+ "loss": 0.5054,
+ "step": 24550
+ },
+ {
+ "epoch": 0.12692123144550901,
+ "grad_norm": 20496.369140625,
+ "learning_rate": 9.886665663128216e-05,
+ "loss": 0.51,
+ "step": 24600
+ },
+ {
+ "epoch": 0.12717920142812184,
+ "grad_norm": 19887.734375,
+ "learning_rate": 9.885890989396133e-05,
+ "loss": 0.5049,
+ "step": 24650
+ },
+ {
+ "epoch": 0.12743717141073466,
+ "grad_norm": 20027.69140625,
+ "learning_rate": 9.885113707670049e-05,
+ "loss": 0.5118,
+ "step": 24700
+ },
+ {
+ "epoch": 0.12769514139334748,
+ "grad_norm": 18888.92578125,
+ "learning_rate": 9.884333818364861e-05,
+ "loss": 0.5168,
+ "step": 24750
+ },
+ {
+ "epoch": 0.1279531113759603,
+ "grad_norm": 20906.673828125,
+ "learning_rate": 9.883551321896862e-05,
+ "loss": 0.5109,
+ "step": 24800
+ },
+ {
+ "epoch": 0.12821108135857312,
+ "grad_norm": 20228.833984375,
+ "learning_rate": 9.882766218683731e-05,
+ "loss": 0.5167,
+ "step": 24850
+ },
+ {
+ "epoch": 0.12846905134118594,
+ "grad_norm": 19832.4609375,
+ "learning_rate": 9.881978509144543e-05,
+ "loss": 0.5113,
+ "step": 24900
+ },
+ {
+ "epoch": 0.12872702132379876,
+ "grad_norm": 18049.193359375,
+ "learning_rate": 9.881188193699758e-05,
+ "loss": 0.5121,
+ "step": 24950
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "grad_norm": 18765.033203125,
+ "learning_rate": 9.880395272771236e-05,
+ "loss": 0.5123,
+ "step": 25000
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "eval_loss": 0.5013377666473389,
+ "eval_runtime": 3332.4061,
+ "eval_samples_per_second": 930.595,
+ "eval_steps_per_second": 1.818,
+ "step": 25000
+ },
+ {
+ "epoch": 0.1292429612890244,
+ "grad_norm": 18435.787109375,
+ "learning_rate": 9.879599746782221e-05,
+ "loss": 0.5096,
+ "step": 25050
+ },
+ {
+ "epoch": 0.12950093127163723,
+ "grad_norm": 18993.890625,
+ "learning_rate": 9.878801616157348e-05,
+ "loss": 0.5091,
+ "step": 25100
+ },
+ {
+ "epoch": 0.12975890125425005,
+ "grad_norm": 19766.783203125,
+ "learning_rate": 9.878000881322646e-05,
+ "loss": 0.5059,
+ "step": 25150
+ },
+ {
+ "epoch": 0.13001687123686287,
+ "grad_norm": 19316.537109375,
+ "learning_rate": 9.87719754270553e-05,
+ "loss": 0.5112,
+ "step": 25200
+ },
+ {
+ "epoch": 0.1302748412194757,
+ "grad_norm": 19288.64453125,
+ "learning_rate": 9.876391600734807e-05,
+ "loss": 0.5031,
+ "step": 25250
+ },
+ {
+ "epoch": 0.1305328112020885,
+ "grad_norm": 18962.7734375,
+ "learning_rate": 9.875583055840673e-05,
+ "loss": 0.5113,
+ "step": 25300
+ },
+ {
+ "epoch": 0.13079078118470136,
+ "grad_norm": 19399.21875,
+ "learning_rate": 9.874771908454714e-05,
+ "loss": 0.5177,
+ "step": 25350
+ },
+ {
+ "epoch": 0.13104875116731418,
+ "grad_norm": 20511.134765625,
+ "learning_rate": 9.873958159009904e-05,
+ "loss": 0.5049,
+ "step": 25400
+ },
+ {
+ "epoch": 0.131306721149927,
+ "grad_norm": 17669.00390625,
+ "learning_rate": 9.87314180794061e-05,
+ "loss": 0.5076,
+ "step": 25450
+ },
+ {
+ "epoch": 0.13156469113253982,
+ "grad_norm": 20254.75390625,
+ "learning_rate": 9.872322855682579e-05,
+ "loss": 0.5102,
+ "step": 25500
+ },
+ {
+ "epoch": 0.13182266111515265,
+ "grad_norm": 21859.880859375,
+ "learning_rate": 9.871501302672956e-05,
+ "loss": 0.5098,
+ "step": 25550
+ },
+ {
+ "epoch": 0.13208063109776547,
+ "grad_norm": 18794.90625,
+ "learning_rate": 9.870677149350268e-05,
+ "loss": 0.5078,
+ "step": 25600
+ },
+ {
+ "epoch": 0.1323386010803783,
+ "grad_norm": 19909.65625,
+ "learning_rate": 9.869850396154434e-05,
+ "loss": 0.5129,
+ "step": 25650
+ },
+ {
+ "epoch": 0.1325965710629911,
+ "grad_norm": 17887.99609375,
+ "learning_rate": 9.869021043526756e-05,
+ "loss": 0.508,
+ "step": 25700
+ },
+ {
+ "epoch": 0.13285454104560393,
+ "grad_norm": 17189.033203125,
+ "learning_rate": 9.868189091909929e-05,
+ "loss": 0.5114,
+ "step": 25750
+ },
+ {
+ "epoch": 0.13311251102821675,
+ "grad_norm": 21320.78125,
+ "learning_rate": 9.867354541748033e-05,
+ "loss": 0.5081,
+ "step": 25800
+ },
+ {
+ "epoch": 0.13337048101082957,
+ "grad_norm": 19035.33984375,
+ "learning_rate": 9.866517393486532e-05,
+ "loss": 0.5065,
+ "step": 25850
+ },
+ {
+ "epoch": 0.1336284509934424,
+ "grad_norm": 19038.876953125,
+ "learning_rate": 9.86567764757228e-05,
+ "loss": 0.5055,
+ "step": 25900
+ },
+ {
+ "epoch": 0.13388642097605521,
+ "grad_norm": 20425.6875,
+ "learning_rate": 9.86483530445352e-05,
+ "loss": 0.5091,
+ "step": 25950
+ },
+ {
+ "epoch": 0.13414439095866804,
+ "grad_norm": 19947.34765625,
+ "learning_rate": 9.863990364579876e-05,
+ "loss": 0.5062,
+ "step": 26000
+ },
+ {
+ "epoch": 0.13440236094128089,
+ "grad_norm": 18758.7890625,
+ "learning_rate": 9.863142828402361e-05,
+ "loss": 0.5099,
+ "step": 26050
+ },
+ {
+ "epoch": 0.1346603309238937,
+ "grad_norm": 18494.076171875,
+ "learning_rate": 9.862292696373372e-05,
+ "loss": 0.5043,
+ "step": 26100
+ },
+ {
+ "epoch": 0.13491830090650653,
+ "grad_norm": 19646.841796875,
+ "learning_rate": 9.861439968946696e-05,
+ "loss": 0.508,
+ "step": 26150
+ },
+ {
+ "epoch": 0.13517627088911935,
+ "grad_norm": 19356.009765625,
+ "learning_rate": 9.8605846465775e-05,
+ "loss": 0.5015,
+ "step": 26200
+ },
+ {
+ "epoch": 0.13543424087173217,
+ "grad_norm": 19243.1875,
+ "learning_rate": 9.859726729722341e-05,
+ "loss": 0.5086,
+ "step": 26250
+ },
+ {
+ "epoch": 0.135692210854345,
+ "grad_norm": 20116.43359375,
+ "learning_rate": 9.858866218839156e-05,
+ "loss": 0.5074,
+ "step": 26300
+ },
+ {
+ "epoch": 0.1359501808369578,
+ "grad_norm": 18592.1015625,
+ "learning_rate": 9.858003114387269e-05,
+ "loss": 0.5054,
+ "step": 26350
+ },
+ {
+ "epoch": 0.13620815081957063,
+ "grad_norm": 19552.505859375,
+ "learning_rate": 9.85713741682739e-05,
+ "loss": 0.5042,
+ "step": 26400
+ },
+ {
+ "epoch": 0.13646612080218345,
+ "grad_norm": 18818.142578125,
+ "learning_rate": 9.856269126621611e-05,
+ "loss": 0.5106,
+ "step": 26450
+ },
+ {
+ "epoch": 0.13672409078479628,
+ "grad_norm": 21973.685546875,
+ "learning_rate": 9.855398244233407e-05,
+ "loss": 0.5116,
+ "step": 26500
+ },
+ {
+ "epoch": 0.1369820607674091,
+ "grad_norm": 19296.7890625,
+ "learning_rate": 9.854524770127641e-05,
+ "loss": 0.5103,
+ "step": 26550
+ },
+ {
+ "epoch": 0.13724003075002192,
+ "grad_norm": 18975.22265625,
+ "learning_rate": 9.853648704770554e-05,
+ "loss": 0.5093,
+ "step": 26600
+ },
+ {
+ "epoch": 0.13749800073263474,
+ "grad_norm": 20003.19140625,
+ "learning_rate": 9.852770048629776e-05,
+ "loss": 0.5094,
+ "step": 26650
+ },
+ {
+ "epoch": 0.13775597071524756,
+ "grad_norm": 19885.341796875,
+ "learning_rate": 9.851888802174312e-05,
+ "loss": 0.502,
+ "step": 26700
+ },
+ {
+ "epoch": 0.1380139406978604,
+ "grad_norm": 18030.115234375,
+ "learning_rate": 9.851004965874557e-05,
+ "loss": 0.5045,
+ "step": 26750
+ },
+ {
+ "epoch": 0.13827191068047323,
+ "grad_norm": 19143.369140625,
+ "learning_rate": 9.850118540202286e-05,
+ "loss": 0.5068,
+ "step": 26800
+ },
+ {
+ "epoch": 0.13852988066308605,
+ "grad_norm": 18902.5390625,
+ "learning_rate": 9.849229525630656e-05,
+ "loss": 0.4984,
+ "step": 26850
+ },
+ {
+ "epoch": 0.13878785064569887,
+ "grad_norm": 18523.115234375,
+ "learning_rate": 9.848337922634206e-05,
+ "loss": 0.5099,
+ "step": 26900
+ },
+ {
+ "epoch": 0.1390458206283117,
+ "grad_norm": 19873.283203125,
+ "learning_rate": 9.847443731688852e-05,
+ "loss": 0.5039,
+ "step": 26950
+ },
+ {
+ "epoch": 0.13930379061092452,
+ "grad_norm": 20202.23046875,
+ "learning_rate": 9.846546953271902e-05,
+ "loss": 0.507,
+ "step": 27000
+ },
+ {
+ "epoch": 0.13956176059353734,
+ "grad_norm": 17484.572265625,
+ "learning_rate": 9.845647587862034e-05,
+ "loss": 0.5113,
+ "step": 27050
+ },
+ {
+ "epoch": 0.13981973057615016,
+ "grad_norm": 17931.634765625,
+ "learning_rate": 9.844745635939316e-05,
+ "loss": 0.5051,
+ "step": 27100
+ },
+ {
+ "epoch": 0.14007770055876298,
+ "grad_norm": 20536.693359375,
+ "learning_rate": 9.843841097985191e-05,
+ "loss": 0.5044,
+ "step": 27150
+ },
+ {
+ "epoch": 0.1403356705413758,
+ "grad_norm": 18379.619140625,
+ "learning_rate": 9.842933974482482e-05,
+ "loss": 0.5071,
+ "step": 27200
+ },
+ {
+ "epoch": 0.14059364052398862,
+ "grad_norm": 19097.240234375,
+ "learning_rate": 9.842024265915397e-05,
+ "loss": 0.5046,
+ "step": 27250
+ },
+ {
+ "epoch": 0.14085161050660144,
+ "grad_norm": 22569.80859375,
+ "learning_rate": 9.841111972769517e-05,
+ "loss": 0.5022,
+ "step": 27300
+ },
+ {
+ "epoch": 0.14110958048921426,
+ "grad_norm": 17499.166015625,
+ "learning_rate": 9.84019709553181e-05,
+ "loss": 0.5014,
+ "step": 27350
+ },
+ {
+ "epoch": 0.14136755047182709,
+ "grad_norm": 20447.25,
+ "learning_rate": 9.839279634690619e-05,
+ "loss": 0.5065,
+ "step": 27400
+ },
+ {
+ "epoch": 0.14162552045443993,
+ "grad_norm": 20977.70703125,
+ "learning_rate": 9.838359590735665e-05,
+ "loss": 0.5042,
+ "step": 27450
+ },
+ {
+ "epoch": 0.14188349043705276,
+ "grad_norm": 18168.962890625,
+ "learning_rate": 9.83743696415805e-05,
+ "loss": 0.5043,
+ "step": 27500
+ },
+ {
+ "epoch": 0.14214146041966558,
+ "grad_norm": 18671.841796875,
+ "learning_rate": 9.836511755450256e-05,
+ "loss": 0.5054,
+ "step": 27550
+ },
+ {
+ "epoch": 0.1423994304022784,
+ "grad_norm": 17737.90625,
+ "learning_rate": 9.835583965106141e-05,
+ "loss": 0.507,
+ "step": 27600
+ },
+ {
+ "epoch": 0.14265740038489122,
+ "grad_norm": 23218.873046875,
+ "learning_rate": 9.834653593620939e-05,
+ "loss": 0.5055,
+ "step": 27650
+ },
+ {
+ "epoch": 0.14291537036750404,
+ "grad_norm": 20013.341796875,
+ "learning_rate": 9.833720641491269e-05,
+ "loss": 0.5008,
+ "step": 27700
+ },
+ {
+ "epoch": 0.14317334035011686,
+ "grad_norm": 21755.08203125,
+ "learning_rate": 9.832785109215119e-05,
+ "loss": 0.5029,
+ "step": 27750
+ },
+ {
+ "epoch": 0.14343131033272968,
+ "grad_norm": 18450.541015625,
+ "learning_rate": 9.831846997291859e-05,
+ "loss": 0.5086,
+ "step": 27800
+ },
+ {
+ "epoch": 0.1436892803153425,
+ "grad_norm": 17578.990234375,
+ "learning_rate": 9.830906306222235e-05,
+ "loss": 0.498,
+ "step": 27850
+ },
+ {
+ "epoch": 0.14394725029795533,
+ "grad_norm": 18771.2578125,
+ "learning_rate": 9.82996303650837e-05,
+ "loss": 0.5006,
+ "step": 27900
+ },
+ {
+ "epoch": 0.14420522028056815,
+ "grad_norm": 19841.912109375,
+ "learning_rate": 9.829017188653763e-05,
+ "loss": 0.5003,
+ "step": 27950
+ },
+ {
+ "epoch": 0.14446319026318097,
+ "grad_norm": 19089.384765625,
+ "learning_rate": 9.82806876316329e-05,
+ "loss": 0.5028,
+ "step": 28000
+ },
+ {
+ "epoch": 0.1447211602457938,
+ "grad_norm": 17971.998046875,
+ "learning_rate": 9.827117760543198e-05,
+ "loss": 0.5103,
+ "step": 28050
+ },
+ {
+ "epoch": 0.1449791302284066,
+ "grad_norm": 19590.46875,
+ "learning_rate": 9.826164181301121e-05,
+ "loss": 0.5075,
+ "step": 28100
+ },
+ {
+ "epoch": 0.14523710021101943,
+ "grad_norm": 19316.150390625,
+ "learning_rate": 9.825208025946056e-05,
+ "loss": 0.4971,
+ "step": 28150
+ },
+ {
+ "epoch": 0.14549507019363228,
+ "grad_norm": 19814.3125,
+ "learning_rate": 9.82424929498838e-05,
+ "loss": 0.501,
+ "step": 28200
+ },
+ {
+ "epoch": 0.1457530401762451,
+ "grad_norm": 18669.203125,
+ "learning_rate": 9.823287988939847e-05,
+ "loss": 0.5027,
+ "step": 28250
+ },
+ {
+ "epoch": 0.14601101015885792,
+ "grad_norm": 20375.48828125,
+ "learning_rate": 9.822324108313585e-05,
+ "loss": 0.4966,
+ "step": 28300
+ },
+ {
+ "epoch": 0.14626898014147074,
+ "grad_norm": 19665.4296875,
+ "learning_rate": 9.82135765362409e-05,
+ "loss": 0.4966,
+ "step": 28350
+ },
+ {
+ "epoch": 0.14652695012408357,
+ "grad_norm": 19579.771484375,
+ "learning_rate": 9.820388625387242e-05,
+ "loss": 0.5028,
+ "step": 28400
+ },
+ {
+ "epoch": 0.1467849201066964,
+ "grad_norm": 20270.564453125,
+ "learning_rate": 9.819417024120285e-05,
+ "loss": 0.4972,
+ "step": 28450
+ },
+ {
+ "epoch": 0.1470428900893092,
+ "grad_norm": 20025.6328125,
+ "learning_rate": 9.818442850341845e-05,
+ "loss": 0.5082,
+ "step": 28500
+ },
+ {
+ "epoch": 0.14730086007192203,
+ "grad_norm": 19062.525390625,
+ "learning_rate": 9.817466104571915e-05,
+ "loss": 0.4983,
+ "step": 28550
+ },
+ {
+ "epoch": 0.14755883005453485,
+ "grad_norm": 18558.0390625,
+ "learning_rate": 9.816486787331862e-05,
+ "loss": 0.5004,
+ "step": 28600
+ },
+ {
+ "epoch": 0.14781680003714767,
+ "grad_norm": 20880.6875,
+ "learning_rate": 9.815504899144428e-05,
+ "loss": 0.5036,
+ "step": 28650
+ },
+ {
+ "epoch": 0.1480747700197605,
+ "grad_norm": 19120.3359375,
+ "learning_rate": 9.814520440533726e-05,
+ "loss": 0.5004,
+ "step": 28700
+ },
+ {
+ "epoch": 0.14833274000237331,
+ "grad_norm": 17185.451171875,
+ "learning_rate": 9.813533412025242e-05,
+ "loss": 0.5047,
+ "step": 28750
+ },
+ {
+ "epoch": 0.14859070998498614,
+ "grad_norm": 21795.697265625,
+ "learning_rate": 9.81254381414583e-05,
+ "loss": 0.5033,
+ "step": 28800
+ },
+ {
+ "epoch": 0.14884867996759896,
+ "grad_norm": 21923.44140625,
+ "learning_rate": 9.811551647423718e-05,
+ "loss": 0.4957,
+ "step": 28850
+ },
+ {
+ "epoch": 0.1491066499502118,
+ "grad_norm": 18988.30078125,
+ "learning_rate": 9.810556912388509e-05,
+ "loss": 0.4979,
+ "step": 28900
+ },
+ {
+ "epoch": 0.14936461993282463,
+ "grad_norm": 18479.74609375,
+ "learning_rate": 9.809559609571169e-05,
+ "loss": 0.5003,
+ "step": 28950
+ },
+ {
+ "epoch": 0.14962258991543745,
+ "grad_norm": 20426.57421875,
+ "learning_rate": 9.808559739504043e-05,
+ "loss": 0.5019,
+ "step": 29000
+ },
+ {
+ "epoch": 0.14988055989805027,
+ "grad_norm": 20044.365234375,
+ "learning_rate": 9.80755730272084e-05,
+ "loss": 0.5012,
+ "step": 29050
+ },
+ {
+ "epoch": 0.1501385298806631,
+ "grad_norm": 18321.439453125,
+ "learning_rate": 9.806552299756641e-05,
+ "loss": 0.4918,
+ "step": 29100
+ },
+ {
+ "epoch": 0.1503964998632759,
+ "grad_norm": 20315.681640625,
+ "learning_rate": 9.805544731147899e-05,
+ "loss": 0.5015,
+ "step": 29150
+ },
+ {
+ "epoch": 0.15065446984588873,
+ "grad_norm": 20399.990234375,
+ "learning_rate": 9.804534597432432e-05,
+ "loss": 0.4967,
+ "step": 29200
+ },
+ {
+ "epoch": 0.15091243982850155,
+ "grad_norm": 19539.91796875,
+ "learning_rate": 9.803521899149432e-05,
+ "loss": 0.5002,
+ "step": 29250
+ },
+ {
+ "epoch": 0.15117040981111438,
+ "grad_norm": 20317.970703125,
+ "learning_rate": 9.802506636839457e-05,
+ "loss": 0.4988,
+ "step": 29300
+ },
+ {
+ "epoch": 0.1514283797937272,
+ "grad_norm": 18728.32421875,
+ "learning_rate": 9.801488811044434e-05,
+ "loss": 0.5007,
+ "step": 29350
+ },
+ {
+ "epoch": 0.15168634977634002,
+ "grad_norm": 21256.51171875,
+ "learning_rate": 9.80046842230766e-05,
+ "loss": 0.5066,
+ "step": 29400
+ },
+ {
+ "epoch": 0.15194431975895284,
+ "grad_norm": 18871.8828125,
+ "learning_rate": 9.799445471173799e-05,
+ "loss": 0.502,
+ "step": 29450
+ },
+ {
+ "epoch": 0.15220228974156566,
+ "grad_norm": 18434.251953125,
+ "learning_rate": 9.798419958188878e-05,
+ "loss": 0.5018,
+ "step": 29500
+ },
+ {
+ "epoch": 0.15246025972417848,
+ "grad_norm": 18562.412109375,
+ "learning_rate": 9.7973918839003e-05,
+ "loss": 0.4978,
+ "step": 29550
+ },
+ {
+ "epoch": 0.15271822970679133,
+ "grad_norm": 20020.7890625,
+ "learning_rate": 9.796361248856832e-05,
+ "loss": 0.4989,
+ "step": 29600
+ },
+ {
+ "epoch": 0.15297619968940415,
+ "grad_norm": 20026.6015625,
+ "learning_rate": 9.795328053608606e-05,
+ "loss": 0.5002,
+ "step": 29650
+ },
+ {
+ "epoch": 0.15323416967201697,
+ "grad_norm": 20098.703125,
+ "learning_rate": 9.794292298707119e-05,
+ "loss": 0.4938,
+ "step": 29700
+ },
+ {
+ "epoch": 0.1534921396546298,
+ "grad_norm": 18960.154296875,
+ "learning_rate": 9.793253984705239e-05,
+ "loss": 0.4956,
+ "step": 29750
+ },
+ {
+ "epoch": 0.15375010963724262,
+ "grad_norm": 20478.669921875,
+ "learning_rate": 9.7922131121572e-05,
+ "loss": 0.4998,
+ "step": 29800
+ },
+ {
+ "epoch": 0.15400807961985544,
+ "grad_norm": 20406.701171875,
+ "learning_rate": 9.791169681618596e-05,
+ "loss": 0.5083,
+ "step": 29850
+ },
+ {
+ "epoch": 0.15426604960246826,
+ "grad_norm": 17598.75390625,
+ "learning_rate": 9.790123693646391e-05,
+ "loss": 0.4968,
+ "step": 29900
+ },
+ {
+ "epoch": 0.15452401958508108,
+ "grad_norm": 19622.521484375,
+ "learning_rate": 9.789075148798915e-05,
+ "loss": 0.4881,
+ "step": 29950
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "grad_norm": 20092.87109375,
+ "learning_rate": 9.78802404763586e-05,
+ "loss": 0.4994,
+ "step": 30000
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "eval_loss": 0.4904574453830719,
+ "eval_runtime": 3267.2095,
+ "eval_samples_per_second": 949.165,
+ "eval_steps_per_second": 1.854,
+ "step": 30000
+ },
+ {
+ "epoch": 0.15503995955030672,
+ "grad_norm": 19136.84765625,
+ "learning_rate": 9.786970390718282e-05,
+ "loss": 0.4963,
+ "step": 30050
+ },
+ {
+ "epoch": 0.15529792953291954,
+ "grad_norm": 20464.998046875,
+ "learning_rate": 9.785914178608603e-05,
+ "loss": 0.4994,
+ "step": 30100
+ },
+ {
+ "epoch": 0.15555589951553236,
+ "grad_norm": 23388.55859375,
+ "learning_rate": 9.784855411870611e-05,
+ "loss": 0.5036,
+ "step": 30150
+ },
+ {
+ "epoch": 0.15581386949814519,
+ "grad_norm": 20002.095703125,
+ "learning_rate": 9.783794091069451e-05,
+ "loss": 0.5021,
+ "step": 30200
+ },
+ {
+ "epoch": 0.156071839480758,
+ "grad_norm": 19565.419921875,
+ "learning_rate": 9.782730216771641e-05,
+ "loss": 0.4929,
+ "step": 30250
+ },
+ {
+ "epoch": 0.15632980946337086,
+ "grad_norm": 20284.173828125,
+ "learning_rate": 9.781663789545052e-05,
+ "loss": 0.4889,
+ "step": 30300
+ },
+ {
+ "epoch": 0.15658777944598368,
+ "grad_norm": 18613.439453125,
+ "learning_rate": 9.780594809958922e-05,
+ "loss": 0.496,
+ "step": 30350
+ },
+ {
+ "epoch": 0.1568457494285965,
+ "grad_norm": 19419.1640625,
+ "learning_rate": 9.779523278583855e-05,
+ "loss": 0.4977,
+ "step": 30400
+ },
+ {
+ "epoch": 0.15710371941120932,
+ "grad_norm": 21695.361328125,
+ "learning_rate": 9.778449195991813e-05,
+ "loss": 0.4998,
+ "step": 30450
+ },
+ {
+ "epoch": 0.15736168939382214,
+ "grad_norm": 21914.3828125,
+ "learning_rate": 9.777372562756117e-05,
+ "loss": 0.4936,
+ "step": 30500
+ },
+ {
+ "epoch": 0.15761965937643496,
+ "grad_norm": 22384.525390625,
+ "learning_rate": 9.776293379451458e-05,
+ "loss": 0.5034,
+ "step": 30550
+ },
+ {
+ "epoch": 0.15787762935904778,
+ "grad_norm": 21174.220703125,
+ "learning_rate": 9.775211646653879e-05,
+ "loss": 0.4928,
+ "step": 30600
+ },
+ {
+ "epoch": 0.1581355993416606,
+ "grad_norm": 19809.953125,
+ "learning_rate": 9.77412736494079e-05,
+ "loss": 0.5014,
+ "step": 30650
+ },
+ {
+ "epoch": 0.15839356932427343,
+ "grad_norm": 19657.048828125,
+ "learning_rate": 9.773040534890958e-05,
+ "loss": 0.5022,
+ "step": 30700
+ },
+ {
+ "epoch": 0.15865153930688625,
+ "grad_norm": 20559.490234375,
+ "learning_rate": 9.771951157084514e-05,
+ "loss": 0.4923,
+ "step": 30750
+ },
+ {
+ "epoch": 0.15890950928949907,
+ "grad_norm": 19473.294921875,
+ "learning_rate": 9.770859232102946e-05,
+ "loss": 0.4991,
+ "step": 30800
+ },
+ {
+ "epoch": 0.1591674792721119,
+ "grad_norm": 19243.509765625,
+ "learning_rate": 9.769764760529102e-05,
+ "loss": 0.4934,
+ "step": 30850
+ },
+ {
+ "epoch": 0.1594254492547247,
+ "grad_norm": 20882.853515625,
+ "learning_rate": 9.768667742947189e-05,
+ "loss": 0.4989,
+ "step": 30900
+ },
+ {
+ "epoch": 0.15968341923733753,
+ "grad_norm": 19654.17578125,
+ "learning_rate": 9.767568179942776e-05,
+ "loss": 0.501,
+ "step": 30950
+ },
+ {
+ "epoch": 0.15994138921995035,
+ "grad_norm": 20069.412109375,
+ "learning_rate": 9.766466072102786e-05,
+ "loss": 0.5001,
+ "step": 31000
+ },
+ {
+ "epoch": 0.1601993592025632,
+ "grad_norm": 19730.416015625,
+ "learning_rate": 9.765361420015506e-05,
+ "loss": 0.4947,
+ "step": 31050
+ },
+ {
+ "epoch": 0.16045732918517602,
+ "grad_norm": 19825.43359375,
+ "learning_rate": 9.764254224270573e-05,
+ "loss": 0.5012,
+ "step": 31100
+ },
+ {
+ "epoch": 0.16071529916778884,
+ "grad_norm": 19111.859375,
+ "learning_rate": 9.763144485458992e-05,
+ "loss": 0.4946,
+ "step": 31150
+ },
+ {
+ "epoch": 0.16097326915040167,
+ "grad_norm": 20071.7578125,
+ "learning_rate": 9.762032204173116e-05,
+ "loss": 0.4961,
+ "step": 31200
+ },
+ {
+ "epoch": 0.1612312391330145,
+ "grad_norm": 18780.638671875,
+ "learning_rate": 9.76091738100666e-05,
+ "loss": 0.4952,
+ "step": 31250
+ },
+ {
+ "epoch": 0.1614892091156273,
+ "grad_norm": 20192.69140625,
+ "learning_rate": 9.759800016554699e-05,
+ "loss": 0.4919,
+ "step": 31300
+ },
+ {
+ "epoch": 0.16174717909824013,
+ "grad_norm": 18430.57421875,
+ "learning_rate": 9.758680111413653e-05,
+ "loss": 0.4953,
+ "step": 31350
+ },
+ {
+ "epoch": 0.16200514908085295,
+ "grad_norm": 18921.740234375,
+ "learning_rate": 9.757557666181314e-05,
+ "loss": 0.5013,
+ "step": 31400
+ },
+ {
+ "epoch": 0.16226311906346577,
+ "grad_norm": 18918.857421875,
+ "learning_rate": 9.756432681456815e-05,
+ "loss": 0.4976,
+ "step": 31450
+ },
+ {
+ "epoch": 0.1625210890460786,
+ "grad_norm": 21373.814453125,
+ "learning_rate": 9.755305157840655e-05,
+ "loss": 0.4975,
+ "step": 31500
+ },
+ {
+ "epoch": 0.16277905902869141,
+ "grad_norm": 19509.482421875,
+ "learning_rate": 9.754175095934684e-05,
+ "loss": 0.4966,
+ "step": 31550
+ },
+ {
+ "epoch": 0.16303702901130424,
+ "grad_norm": 18362.125,
+ "learning_rate": 9.753042496342103e-05,
+ "loss": 0.505,
+ "step": 31600
+ },
+ {
+ "epoch": 0.16329499899391706,
+ "grad_norm": 20344.11328125,
+ "learning_rate": 9.751907359667476e-05,
+ "loss": 0.4988,
+ "step": 31650
+ },
+ {
+ "epoch": 0.16355296897652988,
+ "grad_norm": 21398.97265625,
+ "learning_rate": 9.750769686516715e-05,
+ "loss": 0.493,
+ "step": 31700
+ },
+ {
+ "epoch": 0.16381093895914273,
+ "grad_norm": 21106.955078125,
+ "learning_rate": 9.74962947749709e-05,
+ "loss": 0.4999,
+ "step": 31750
+ },
+ {
+ "epoch": 0.16406890894175555,
+ "grad_norm": 19787.216796875,
+ "learning_rate": 9.74848673321722e-05,
+ "loss": 0.4932,
+ "step": 31800
+ },
+ {
+ "epoch": 0.16432687892436837,
+ "grad_norm": 19198.83984375,
+ "learning_rate": 9.747341454287082e-05,
+ "loss": 0.4919,
+ "step": 31850
+ },
+ {
+ "epoch": 0.1645848489069812,
+ "grad_norm": 18460.92578125,
+ "learning_rate": 9.746193641318002e-05,
+ "loss": 0.495,
+ "step": 31900
+ },
+ {
+ "epoch": 0.164842818889594,
+ "grad_norm": 18591.427734375,
+ "learning_rate": 9.74504329492266e-05,
+ "loss": 0.4888,
+ "step": 31950
+ },
+ {
+ "epoch": 0.16510078887220683,
+ "grad_norm": 21651.3515625,
+ "learning_rate": 9.743890415715091e-05,
+ "loss": 0.4909,
+ "step": 32000
+ },
+ {
+ "epoch": 0.16535875885481965,
+ "grad_norm": 18884.486328125,
+ "learning_rate": 9.742735004310677e-05,
+ "loss": 0.4981,
+ "step": 32050
+ },
+ {
+ "epoch": 0.16561672883743248,
+ "grad_norm": 19223.658203125,
+ "learning_rate": 9.741577061326157e-05,
+ "loss": 0.4961,
+ "step": 32100
+ },
+ {
+ "epoch": 0.1658746988200453,
+ "grad_norm": 18266.560546875,
+ "learning_rate": 9.740416587379615e-05,
+ "loss": 0.4914,
+ "step": 32150
+ },
+ {
+ "epoch": 0.16613266880265812,
+ "grad_norm": 19871.509765625,
+ "learning_rate": 9.739253583090493e-05,
+ "loss": 0.499,
+ "step": 32200
+ },
+ {
+ "epoch": 0.16639063878527094,
+ "grad_norm": 19524.298828125,
+ "learning_rate": 9.738088049079577e-05,
+ "loss": 0.4944,
+ "step": 32250
+ },
+ {
+ "epoch": 0.16664860876788376,
+ "grad_norm": 20308.685546875,
+ "learning_rate": 9.73691998596901e-05,
+ "loss": 0.4941,
+ "step": 32300
+ },
+ {
+ "epoch": 0.16690657875049658,
+ "grad_norm": 19125.52734375,
+ "learning_rate": 9.735749394382278e-05,
+ "loss": 0.4968,
+ "step": 32350
+ },
+ {
+ "epoch": 0.1671645487331094,
+ "grad_norm": 18792.716796875,
+ "learning_rate": 9.734576274944223e-05,
+ "loss": 0.4959,
+ "step": 32400
+ },
+ {
+ "epoch": 0.16742251871572225,
+ "grad_norm": 18521.54296875,
+ "learning_rate": 9.73340062828103e-05,
+ "loss": 0.4913,
+ "step": 32450
+ },
+ {
+ "epoch": 0.16768048869833507,
+ "grad_norm": 19540.41796875,
+ "learning_rate": 9.732222455020241e-05,
+ "loss": 0.4999,
+ "step": 32500
+ },
+ {
+ "epoch": 0.1679384586809479,
+ "grad_norm": 18682.84375,
+ "learning_rate": 9.73104175579074e-05,
+ "loss": 0.4991,
+ "step": 32550
+ },
+ {
+ "epoch": 0.16819642866356072,
+ "grad_norm": 20134.8984375,
+ "learning_rate": 9.72985853122276e-05,
+ "loss": 0.4839,
+ "step": 32600
+ },
+ {
+ "epoch": 0.16845439864617354,
+ "grad_norm": 20375.1484375,
+ "learning_rate": 9.728672781947883e-05,
+ "loss": 0.4941,
+ "step": 32650
+ },
+ {
+ "epoch": 0.16871236862878636,
+ "grad_norm": 19720.98046875,
+ "learning_rate": 9.727484508599042e-05,
+ "loss": 0.4856,
+ "step": 32700
+ },
+ {
+ "epoch": 0.16897033861139918,
+ "grad_norm": 19408.7734375,
+ "learning_rate": 9.726293711810513e-05,
+ "loss": 0.4942,
+ "step": 32750
+ },
+ {
+ "epoch": 0.169228308594012,
+ "grad_norm": 20136.892578125,
+ "learning_rate": 9.725100392217919e-05,
+ "loss": 0.4942,
+ "step": 32800
+ },
+ {
+ "epoch": 0.16948627857662482,
+ "grad_norm": 20555.27734375,
+ "learning_rate": 9.723904550458232e-05,
+ "loss": 0.4907,
+ "step": 32850
+ },
+ {
+ "epoch": 0.16974424855923764,
+ "grad_norm": 18876.787109375,
+ "learning_rate": 9.722706187169769e-05,
+ "loss": 0.4951,
+ "step": 32900
+ },
+ {
+ "epoch": 0.17000221854185046,
+ "grad_norm": 19918.4296875,
+ "learning_rate": 9.721505302992194e-05,
+ "loss": 0.4871,
+ "step": 32950
+ },
+ {
+ "epoch": 0.17026018852446329,
+ "grad_norm": 18593.453125,
+ "learning_rate": 9.720301898566513e-05,
+ "loss": 0.4889,
+ "step": 33000
+ },
+ {
+ "epoch": 0.1705181585070761,
+ "grad_norm": 21007.5625,
+ "learning_rate": 9.719095974535084e-05,
+ "loss": 0.4936,
+ "step": 33050
+ },
+ {
+ "epoch": 0.17077612848968893,
+ "grad_norm": 21749.849609375,
+ "learning_rate": 9.717887531541601e-05,
+ "loss": 0.4915,
+ "step": 33100
+ },
+ {
+ "epoch": 0.17103409847230178,
+ "grad_norm": 19097.896484375,
+ "learning_rate": 9.716676570231114e-05,
+ "loss": 0.4857,
+ "step": 33150
+ },
+ {
+ "epoch": 0.1712920684549146,
+ "grad_norm": 18509.107421875,
+ "learning_rate": 9.715463091250003e-05,
+ "loss": 0.487,
+ "step": 33200
+ },
+ {
+ "epoch": 0.17155003843752742,
+ "grad_norm": 21414.916015625,
+ "learning_rate": 9.714247095246007e-05,
+ "loss": 0.4929,
+ "step": 33250
+ },
+ {
+ "epoch": 0.17180800842014024,
+ "grad_norm": 19836.978515625,
+ "learning_rate": 9.713028582868196e-05,
+ "loss": 0.4948,
+ "step": 33300
+ },
+ {
+ "epoch": 0.17206597840275306,
+ "grad_norm": 18013.787109375,
+ "learning_rate": 9.71180755476699e-05,
+ "loss": 0.4945,
+ "step": 33350
+ },
+ {
+ "epoch": 0.17232394838536588,
+ "grad_norm": 18498.1640625,
+ "learning_rate": 9.71058401159415e-05,
+ "loss": 0.4961,
+ "step": 33400
+ },
+ {
+ "epoch": 0.1725819183679787,
+ "grad_norm": 19871.404296875,
+ "learning_rate": 9.709357954002778e-05,
+ "loss": 0.4896,
+ "step": 33450
+ },
+ {
+ "epoch": 0.17283988835059153,
+ "grad_norm": 20794.05859375,
+ "learning_rate": 9.708129382647324e-05,
+ "loss": 0.4855,
+ "step": 33500
+ },
+ {
+ "epoch": 0.17309785833320435,
+ "grad_norm": 19775.6328125,
+ "learning_rate": 9.706898298183573e-05,
+ "loss": 0.4899,
+ "step": 33550
+ },
+ {
+ "epoch": 0.17335582831581717,
+ "grad_norm": 24329.740234375,
+ "learning_rate": 9.705664701268652e-05,
+ "loss": 0.4879,
+ "step": 33600
+ },
+ {
+ "epoch": 0.17361379829843,
+ "grad_norm": 19666.697265625,
+ "learning_rate": 9.704428592561037e-05,
+ "loss": 0.493,
+ "step": 33650
+ },
+ {
+ "epoch": 0.1738717682810428,
+ "grad_norm": 20382.115234375,
+ "learning_rate": 9.703189972720532e-05,
+ "loss": 0.4922,
+ "step": 33700
+ },
+ {
+ "epoch": 0.17412973826365563,
+ "grad_norm": 20240.46875,
+ "learning_rate": 9.701948842408293e-05,
+ "loss": 0.4908,
+ "step": 33750
+ },
+ {
+ "epoch": 0.17438770824626845,
+ "grad_norm": 18531.224609375,
+ "learning_rate": 9.700705202286811e-05,
+ "loss": 0.489,
+ "step": 33800
+ },
+ {
+ "epoch": 0.17464567822888127,
+ "grad_norm": 19121.0625,
+ "learning_rate": 9.699459053019912e-05,
+ "loss": 0.4884,
+ "step": 33850
+ },
+ {
+ "epoch": 0.17490364821149412,
+ "grad_norm": 19959.931640625,
+ "learning_rate": 9.698210395272773e-05,
+ "loss": 0.4912,
+ "step": 33900
+ },
+ {
+ "epoch": 0.17516161819410694,
+ "grad_norm": 18255.732421875,
+ "learning_rate": 9.696959229711901e-05,
+ "loss": 0.4888,
+ "step": 33950
+ },
+ {
+ "epoch": 0.17541958817671977,
+ "grad_norm": 21808.8671875,
+ "learning_rate": 9.695705557005142e-05,
+ "loss": 0.4945,
+ "step": 34000
+ },
+ {
+ "epoch": 0.1756775581593326,
+ "grad_norm": 18687.521484375,
+ "learning_rate": 9.694449377821685e-05,
+ "loss": 0.4891,
+ "step": 34050
+ },
+ {
+ "epoch": 0.1759355281419454,
+ "grad_norm": 18309.859375,
+ "learning_rate": 9.693190692832053e-05,
+ "loss": 0.4888,
+ "step": 34100
+ },
+ {
+ "epoch": 0.17619349812455823,
+ "grad_norm": 19453.705078125,
+ "learning_rate": 9.691929502708106e-05,
+ "loss": 0.4852,
+ "step": 34150
+ },
+ {
+ "epoch": 0.17645146810717105,
+ "grad_norm": 20964.595703125,
+ "learning_rate": 9.690665808123046e-05,
+ "loss": 0.4931,
+ "step": 34200
+ },
+ {
+ "epoch": 0.17670943808978387,
+ "grad_norm": 20170.5703125,
+ "learning_rate": 9.689399609751405e-05,
+ "loss": 0.4908,
+ "step": 34250
+ },
+ {
+ "epoch": 0.1769674080723967,
+ "grad_norm": 18276.19140625,
+ "learning_rate": 9.688130908269058e-05,
+ "loss": 0.4906,
+ "step": 34300
+ },
+ {
+ "epoch": 0.1772253780550095,
+ "grad_norm": 21062.56640625,
+ "learning_rate": 9.686859704353212e-05,
+ "loss": 0.4911,
+ "step": 34350
+ },
+ {
+ "epoch": 0.17748334803762233,
+ "grad_norm": 21678.6796875,
+ "learning_rate": 9.685585998682414e-05,
+ "loss": 0.4894,
+ "step": 34400
+ },
+ {
+ "epoch": 0.17774131802023516,
+ "grad_norm": 17795.384765625,
+ "learning_rate": 9.684309791936539e-05,
+ "loss": 0.4893,
+ "step": 34450
+ },
+ {
+ "epoch": 0.17799928800284798,
+ "grad_norm": 21536.837890625,
+ "learning_rate": 9.683031084796803e-05,
+ "loss": 0.4889,
+ "step": 34500
+ },
+ {
+ "epoch": 0.1782572579854608,
+ "grad_norm": 20554.423828125,
+ "learning_rate": 9.681749877945756e-05,
+ "loss": 0.4843,
+ "step": 34550
+ },
+ {
+ "epoch": 0.17851522796807365,
+ "grad_norm": 22045.376953125,
+ "learning_rate": 9.680466172067282e-05,
+ "loss": 0.4895,
+ "step": 34600
+ },
+ {
+ "epoch": 0.17877319795068647,
+ "grad_norm": 21406.853515625,
+ "learning_rate": 9.679179967846597e-05,
+ "loss": 0.4914,
+ "step": 34650
+ },
+ {
+ "epoch": 0.1790311679332993,
+ "grad_norm": 20971.037109375,
+ "learning_rate": 9.677891265970252e-05,
+ "loss": 0.485,
+ "step": 34700
+ },
+ {
+ "epoch": 0.1792891379159121,
+ "grad_norm": 20256.73828125,
+ "learning_rate": 9.676600067126129e-05,
+ "loss": 0.4918,
+ "step": 34750
+ },
+ {
+ "epoch": 0.17954710789852493,
+ "grad_norm": 19123.048828125,
+ "learning_rate": 9.67530637200345e-05,
+ "loss": 0.49,
+ "step": 34800
+ },
+ {
+ "epoch": 0.17980507788113775,
+ "grad_norm": 20799.748046875,
+ "learning_rate": 9.674010181292761e-05,
+ "loss": 0.4889,
+ "step": 34850
+ },
+ {
+ "epoch": 0.18006304786375057,
+ "grad_norm": 19569.609375,
+ "learning_rate": 9.672711495685945e-05,
+ "loss": 0.4882,
+ "step": 34900
+ },
+ {
+ "epoch": 0.1803210178463634,
+ "grad_norm": 18339.76171875,
+ "learning_rate": 9.671410315876213e-05,
+ "loss": 0.4884,
+ "step": 34950
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "grad_norm": 20066.099609375,
+ "learning_rate": 9.670106642558111e-05,
+ "loss": 0.4866,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "eval_loss": 0.48020538687705994,
+ "eval_runtime": 3265.3619,
+ "eval_samples_per_second": 949.702,
+ "eval_steps_per_second": 1.855,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18083695781158904,
+ "grad_norm": 18703.037109375,
+ "learning_rate": 9.668800476427515e-05,
+ "loss": 0.4953,
+ "step": 35050
+ },
+ {
+ "epoch": 0.18109492779420186,
+ "grad_norm": 19886.177734375,
+ "learning_rate": 9.667491818181631e-05,
+ "loss": 0.4845,
+ "step": 35100
+ },
+ {
+ "epoch": 0.18135289777681468,
+ "grad_norm": 19349.08203125,
+ "learning_rate": 9.666180668518993e-05,
+ "loss": 0.493,
+ "step": 35150
+ },
+ {
+ "epoch": 0.1816108677594275,
+ "grad_norm": 19786.404296875,
+ "learning_rate": 9.664867028139473e-05,
+ "loss": 0.4815,
+ "step": 35200
+ },
+ {
+ "epoch": 0.18186883774204032,
+ "grad_norm": 21271.05859375,
+ "learning_rate": 9.66355089774426e-05,
+ "loss": 0.4907,
+ "step": 35250
+ },
+ {
+ "epoch": 0.18212680772465317,
+ "grad_norm": 19096.3125,
+ "learning_rate": 9.662232278035885e-05,
+ "loss": 0.4865,
+ "step": 35300
+ },
+ {
+ "epoch": 0.182384777707266,
+ "grad_norm": 20136.935546875,
+ "learning_rate": 9.660911169718196e-05,
+ "loss": 0.4824,
+ "step": 35350
+ },
+ {
+ "epoch": 0.18264274768987881,
+ "grad_norm": 19532.361328125,
+ "learning_rate": 9.65958757349638e-05,
+ "loss": 0.4857,
+ "step": 35400
+ },
+ {
+ "epoch": 0.18290071767249164,
+ "grad_norm": 18227.626953125,
+ "learning_rate": 9.658261490076944e-05,
+ "loss": 0.4871,
+ "step": 35450
+ },
+ {
+ "epoch": 0.18315868765510446,
+ "grad_norm": 21021.564453125,
+ "learning_rate": 9.656932920167727e-05,
+ "loss": 0.485,
+ "step": 35500
+ },
+ {
+ "epoch": 0.18341665763771728,
+ "grad_norm": 19943.9765625,
+ "learning_rate": 9.655601864477893e-05,
+ "loss": 0.4908,
+ "step": 35550
+ },
+ {
+ "epoch": 0.1836746276203301,
+ "grad_norm": 19356.8203125,
+ "learning_rate": 9.654268323717934e-05,
+ "loss": 0.4849,
+ "step": 35600
+ },
+ {
+ "epoch": 0.18393259760294292,
+ "grad_norm": 19431.9453125,
+ "learning_rate": 9.652932298599671e-05,
+ "loss": 0.4927,
+ "step": 35650
+ },
+ {
+ "epoch": 0.18419056758555574,
+ "grad_norm": 18860.0625,
+ "learning_rate": 9.651593789836242e-05,
+ "loss": 0.4879,
+ "step": 35700
+ },
+ {
+ "epoch": 0.18444853756816856,
+ "grad_norm": 18524.46875,
+ "learning_rate": 9.650252798142123e-05,
+ "loss": 0.4877,
+ "step": 35750
+ },
+ {
+ "epoch": 0.18470650755078138,
+ "grad_norm": 18897.322265625,
+ "learning_rate": 9.648909324233107e-05,
+ "loss": 0.4906,
+ "step": 35800
+ },
+ {
+ "epoch": 0.1849644775333942,
+ "grad_norm": 21080.552734375,
+ "learning_rate": 9.647563368826313e-05,
+ "loss": 0.4895,
+ "step": 35850
+ },
+ {
+ "epoch": 0.18522244751600703,
+ "grad_norm": 20014.828125,
+ "learning_rate": 9.64621493264019e-05,
+ "loss": 0.4816,
+ "step": 35900
+ },
+ {
+ "epoch": 0.18548041749861985,
+ "grad_norm": 19470.3984375,
+ "learning_rate": 9.644864016394504e-05,
+ "loss": 0.4812,
+ "step": 35950
+ },
+ {
+ "epoch": 0.1857383874812327,
+ "grad_norm": 21915.400390625,
+ "learning_rate": 9.643510620810348e-05,
+ "loss": 0.4859,
+ "step": 36000
+ },
+ {
+ "epoch": 0.18599635746384552,
+ "grad_norm": 19367.009765625,
+ "learning_rate": 9.642154746610139e-05,
+ "loss": 0.4905,
+ "step": 36050
+ },
+ {
+ "epoch": 0.18625432744645834,
+ "grad_norm": 18379.70703125,
+ "learning_rate": 9.640796394517616e-05,
+ "loss": 0.4878,
+ "step": 36100
+ },
+ {
+ "epoch": 0.18651229742907116,
+ "grad_norm": 18933.455078125,
+ "learning_rate": 9.639435565257842e-05,
+ "loss": 0.4877,
+ "step": 36150
+ },
+ {
+ "epoch": 0.18677026741168398,
+ "grad_norm": 19026.484375,
+ "learning_rate": 9.638072259557201e-05,
+ "loss": 0.4873,
+ "step": 36200
+ },
+ {
+ "epoch": 0.1870282373942968,
+ "grad_norm": 21111.09375,
+ "learning_rate": 9.636706478143398e-05,
+ "loss": 0.4815,
+ "step": 36250
+ },
+ {
+ "epoch": 0.18728620737690962,
+ "grad_norm": 19362.541015625,
+ "learning_rate": 9.635338221745462e-05,
+ "loss": 0.4854,
+ "step": 36300
+ },
+ {
+ "epoch": 0.18754417735952245,
+ "grad_norm": 19861.58984375,
+ "learning_rate": 9.63396749109374e-05,
+ "loss": 0.4832,
+ "step": 36350
+ },
+ {
+ "epoch": 0.18780214734213527,
+ "grad_norm": 18793.623046875,
+ "learning_rate": 9.632594286919905e-05,
+ "loss": 0.4811,
+ "step": 36400
+ },
+ {
+ "epoch": 0.1880601173247481,
+ "grad_norm": 20452.26953125,
+ "learning_rate": 9.631218609956943e-05,
+ "loss": 0.4872,
+ "step": 36450
+ },
+ {
+ "epoch": 0.1883180873073609,
+ "grad_norm": 19237.203125,
+ "learning_rate": 9.629840460939165e-05,
+ "loss": 0.4941,
+ "step": 36500
+ },
+ {
+ "epoch": 0.18857605728997373,
+ "grad_norm": 19828.84765625,
+ "learning_rate": 9.628459840602202e-05,
+ "loss": 0.4869,
+ "step": 36550
+ },
+ {
+ "epoch": 0.18883402727258655,
+ "grad_norm": 18171.08203125,
+ "learning_rate": 9.627076749683e-05,
+ "loss": 0.4915,
+ "step": 36600
+ },
+ {
+ "epoch": 0.18909199725519937,
+ "grad_norm": 21346.9375,
+ "learning_rate": 9.625691188919827e-05,
+ "loss": 0.4913,
+ "step": 36650
+ },
+ {
+ "epoch": 0.1893499672378122,
+ "grad_norm": 20066.7890625,
+ "learning_rate": 9.62430315905227e-05,
+ "loss": 0.4809,
+ "step": 36700
+ },
+ {
+ "epoch": 0.18960793722042504,
+ "grad_norm": 20736.546875,
+ "learning_rate": 9.622912660821231e-05,
+ "loss": 0.4849,
+ "step": 36750
+ },
+ {
+ "epoch": 0.18986590720303786,
+ "grad_norm": 20891.958984375,
+ "learning_rate": 9.62151969496893e-05,
+ "loss": 0.4831,
+ "step": 36800
+ },
+ {
+ "epoch": 0.19012387718565069,
+ "grad_norm": 21394.1953125,
+ "learning_rate": 9.620124262238908e-05,
+ "loss": 0.4855,
+ "step": 36850
+ },
+ {
+ "epoch": 0.1903818471682635,
+ "grad_norm": 19725.89453125,
+ "learning_rate": 9.618726363376016e-05,
+ "loss": 0.48,
+ "step": 36900
+ },
+ {
+ "epoch": 0.19063981715087633,
+ "grad_norm": 21622.78125,
+ "learning_rate": 9.617325999126429e-05,
+ "loss": 0.4832,
+ "step": 36950
+ },
+ {
+ "epoch": 0.19089778713348915,
+ "grad_norm": 22529.548828125,
+ "learning_rate": 9.615923170237633e-05,
+ "loss": 0.4852,
+ "step": 37000
+ },
+ {
+ "epoch": 0.19115575711610197,
+ "grad_norm": 21136.404296875,
+ "learning_rate": 9.614517877458428e-05,
+ "loss": 0.4816,
+ "step": 37050
+ },
+ {
+ "epoch": 0.1914137270987148,
+ "grad_norm": 19039.330078125,
+ "learning_rate": 9.61311012153894e-05,
+ "loss": 0.4835,
+ "step": 37100
+ },
+ {
+ "epoch": 0.1916716970813276,
+ "grad_norm": 19755.974609375,
+ "learning_rate": 9.611699903230594e-05,
+ "loss": 0.4846,
+ "step": 37150
+ },
+ {
+ "epoch": 0.19192966706394043,
+ "grad_norm": 19061.28515625,
+ "learning_rate": 9.610287223286139e-05,
+ "loss": 0.4816,
+ "step": 37200
+ },
+ {
+ "epoch": 0.19218763704655326,
+ "grad_norm": 21649.275390625,
+ "learning_rate": 9.608872082459639e-05,
+ "loss": 0.4837,
+ "step": 37250
+ },
+ {
+ "epoch": 0.19244560702916608,
+ "grad_norm": 19856.759765625,
+ "learning_rate": 9.607454481506466e-05,
+ "loss": 0.4848,
+ "step": 37300
+ },
+ {
+ "epoch": 0.1927035770117789,
+ "grad_norm": 19442.810546875,
+ "learning_rate": 9.60603442118331e-05,
+ "loss": 0.4828,
+ "step": 37350
+ },
+ {
+ "epoch": 0.19296154699439172,
+ "grad_norm": 20076.44140625,
+ "learning_rate": 9.604611902248168e-05,
+ "loss": 0.4896,
+ "step": 37400
+ },
+ {
+ "epoch": 0.19321951697700457,
+ "grad_norm": 18413.908203125,
+ "learning_rate": 9.603186925460359e-05,
+ "loss": 0.4806,
+ "step": 37450
+ },
+ {
+ "epoch": 0.1934774869596174,
+ "grad_norm": 19618.3984375,
+ "learning_rate": 9.601759491580503e-05,
+ "loss": 0.4864,
+ "step": 37500
+ },
+ {
+ "epoch": 0.1937354569422302,
+ "grad_norm": 20347.177734375,
+ "learning_rate": 9.600329601370539e-05,
+ "loss": 0.489,
+ "step": 37550
+ },
+ {
+ "epoch": 0.19399342692484303,
+ "grad_norm": 19288.380859375,
+ "learning_rate": 9.598897255593713e-05,
+ "loss": 0.4829,
+ "step": 37600
+ },
+ {
+ "epoch": 0.19425139690745585,
+ "grad_norm": 20326.1484375,
+ "learning_rate": 9.597462455014585e-05,
+ "loss": 0.4856,
+ "step": 37650
+ },
+ {
+ "epoch": 0.19450936689006867,
+ "grad_norm": 19598.14453125,
+ "learning_rate": 9.596025200399024e-05,
+ "loss": 0.4831,
+ "step": 37700
+ },
+ {
+ "epoch": 0.1947673368726815,
+ "grad_norm": 20041.28125,
+ "learning_rate": 9.594585492514205e-05,
+ "loss": 0.4822,
+ "step": 37750
+ },
+ {
+ "epoch": 0.19502530685529432,
+ "grad_norm": 20853.201171875,
+ "learning_rate": 9.593143332128623e-05,
+ "loss": 0.4874,
+ "step": 37800
+ },
+ {
+ "epoch": 0.19528327683790714,
+ "grad_norm": 21364.455078125,
+ "learning_rate": 9.591698720012068e-05,
+ "loss": 0.482,
+ "step": 37850
+ },
+ {
+ "epoch": 0.19554124682051996,
+ "grad_norm": 18795.447265625,
+ "learning_rate": 9.590251656935652e-05,
+ "loss": 0.489,
+ "step": 37900
+ },
+ {
+ "epoch": 0.19579921680313278,
+ "grad_norm": 23039.455078125,
+ "learning_rate": 9.588802143671784e-05,
+ "loss": 0.4879,
+ "step": 37950
+ },
+ {
+ "epoch": 0.1960571867857456,
+ "grad_norm": 19842.263671875,
+ "learning_rate": 9.58735018099419e-05,
+ "loss": 0.4869,
+ "step": 38000
+ },
+ {
+ "epoch": 0.19631515676835842,
+ "grad_norm": 21241.00390625,
+ "learning_rate": 9.585895769677897e-05,
+ "loss": 0.4746,
+ "step": 38050
+ },
+ {
+ "epoch": 0.19657312675097124,
+ "grad_norm": 19803.2265625,
+ "learning_rate": 9.584438910499245e-05,
+ "loss": 0.4824,
+ "step": 38100
+ },
+ {
+ "epoch": 0.1968310967335841,
+ "grad_norm": 18873.744140625,
+ "learning_rate": 9.582979604235873e-05,
+ "loss": 0.4817,
+ "step": 38150
+ },
+ {
+ "epoch": 0.19708906671619691,
+ "grad_norm": 19128.8828125,
+ "learning_rate": 9.581517851666734e-05,
+ "loss": 0.482,
+ "step": 38200
+ },
+ {
+ "epoch": 0.19734703669880974,
+ "grad_norm": 20514.16796875,
+ "learning_rate": 9.580053653572081e-05,
+ "loss": 0.4781,
+ "step": 38250
+ },
+ {
+ "epoch": 0.19760500668142256,
+ "grad_norm": 19135.58984375,
+ "learning_rate": 9.578587010733475e-05,
+ "loss": 0.4815,
+ "step": 38300
+ },
+ {
+ "epoch": 0.19786297666403538,
+ "grad_norm": 22849.197265625,
+ "learning_rate": 9.577117923933782e-05,
+ "loss": 0.4794,
+ "step": 38350
+ },
+ {
+ "epoch": 0.1981209466466482,
+ "grad_norm": 21278.736328125,
+ "learning_rate": 9.575646393957173e-05,
+ "loss": 0.4832,
+ "step": 38400
+ },
+ {
+ "epoch": 0.19837891662926102,
+ "grad_norm": 19292.162109375,
+ "learning_rate": 9.57417242158912e-05,
+ "loss": 0.4876,
+ "step": 38450
+ },
+ {
+ "epoch": 0.19863688661187384,
+ "grad_norm": 17778.423828125,
+ "learning_rate": 9.572696007616402e-05,
+ "loss": 0.4842,
+ "step": 38500
+ },
+ {
+ "epoch": 0.19889485659448666,
+ "grad_norm": 18855.140625,
+ "learning_rate": 9.5712171528271e-05,
+ "loss": 0.4846,
+ "step": 38550
+ },
+ {
+ "epoch": 0.19915282657709948,
+ "grad_norm": 21640.8203125,
+ "learning_rate": 9.5697358580106e-05,
+ "loss": 0.4829,
+ "step": 38600
+ },
+ {
+ "epoch": 0.1994107965597123,
+ "grad_norm": 19358.3828125,
+ "learning_rate": 9.568252123957586e-05,
+ "loss": 0.4806,
+ "step": 38650
+ },
+ {
+ "epoch": 0.19966876654232513,
+ "grad_norm": 20781.98828125,
+ "learning_rate": 9.566765951460046e-05,
+ "loss": 0.4849,
+ "step": 38700
+ },
+ {
+ "epoch": 0.19992673652493795,
+ "grad_norm": 20604.7265625,
+ "learning_rate": 9.565277341311271e-05,
+ "loss": 0.4856,
+ "step": 38750
+ },
+ {
+ "epoch": 0.20018470650755077,
+ "grad_norm": 20930.048828125,
+ "learning_rate": 9.563786294305854e-05,
+ "loss": 0.4812,
+ "step": 38800
+ },
+ {
+ "epoch": 0.20044267649016362,
+ "grad_norm": 22721.259765625,
+ "learning_rate": 9.562292811239686e-05,
+ "loss": 0.4857,
+ "step": 38850
+ },
+ {
+ "epoch": 0.20070064647277644,
+ "grad_norm": 19667.57421875,
+ "learning_rate": 9.560796892909957e-05,
+ "loss": 0.483,
+ "step": 38900
+ },
+ {
+ "epoch": 0.20095861645538926,
+ "grad_norm": 18259.19140625,
+ "learning_rate": 9.559298540115164e-05,
+ "loss": 0.4851,
+ "step": 38950
+ },
+ {
+ "epoch": 0.20121658643800208,
+ "grad_norm": 20980.18359375,
+ "learning_rate": 9.557797753655096e-05,
+ "loss": 0.4815,
+ "step": 39000
+ },
+ {
+ "epoch": 0.2014745564206149,
+ "grad_norm": 19840.025390625,
+ "learning_rate": 9.556294534330841e-05,
+ "loss": 0.4878,
+ "step": 39050
+ },
+ {
+ "epoch": 0.20173252640322772,
+ "grad_norm": 20406.69921875,
+ "learning_rate": 9.554788882944792e-05,
+ "loss": 0.481,
+ "step": 39100
+ },
+ {
+ "epoch": 0.20199049638584055,
+ "grad_norm": 19177.447265625,
+ "learning_rate": 9.553280800300637e-05,
+ "loss": 0.4857,
+ "step": 39150
+ },
+ {
+ "epoch": 0.20224846636845337,
+ "grad_norm": 21242.21875,
+ "learning_rate": 9.551770287203359e-05,
+ "loss": 0.4889,
+ "step": 39200
+ },
+ {
+ "epoch": 0.2025064363510662,
+ "grad_norm": 19343.58203125,
+ "learning_rate": 9.550257344459241e-05,
+ "loss": 0.482,
+ "step": 39250
+ },
+ {
+ "epoch": 0.202764406333679,
+ "grad_norm": 21327.587890625,
+ "learning_rate": 9.548741972875863e-05,
+ "loss": 0.4802,
+ "step": 39300
+ },
+ {
+ "epoch": 0.20302237631629183,
+ "grad_norm": 21366.98828125,
+ "learning_rate": 9.547224173262102e-05,
+ "loss": 0.4779,
+ "step": 39350
+ },
+ {
+ "epoch": 0.20328034629890465,
+ "grad_norm": 20876.39453125,
+ "learning_rate": 9.545703946428128e-05,
+ "loss": 0.4843,
+ "step": 39400
+ },
+ {
+ "epoch": 0.20353831628151747,
+ "grad_norm": 21280.873046875,
+ "learning_rate": 9.544181293185413e-05,
+ "loss": 0.4805,
+ "step": 39450
+ },
+ {
+ "epoch": 0.2037962862641303,
+ "grad_norm": 19546.134765625,
+ "learning_rate": 9.542656214346713e-05,
+ "loss": 0.4753,
+ "step": 39500
+ },
+ {
+ "epoch": 0.20405425624674312,
+ "grad_norm": 19179.05859375,
+ "learning_rate": 9.541128710726091e-05,
+ "loss": 0.4812,
+ "step": 39550
+ },
+ {
+ "epoch": 0.20431222622935596,
+ "grad_norm": 23525.50390625,
+ "learning_rate": 9.539598783138897e-05,
+ "loss": 0.4843,
+ "step": 39600
+ },
+ {
+ "epoch": 0.20457019621196879,
+ "grad_norm": 19369.103515625,
+ "learning_rate": 9.538066432401775e-05,
+ "loss": 0.4788,
+ "step": 39650
+ },
+ {
+ "epoch": 0.2048281661945816,
+ "grad_norm": 20777.119140625,
+ "learning_rate": 9.536531659332667e-05,
+ "loss": 0.4779,
+ "step": 39700
+ },
+ {
+ "epoch": 0.20508613617719443,
+ "grad_norm": 18987.701171875,
+ "learning_rate": 9.534994464750806e-05,
+ "loss": 0.4807,
+ "step": 39750
+ },
+ {
+ "epoch": 0.20534410615980725,
+ "grad_norm": 19523.873046875,
+ "learning_rate": 9.533454849476712e-05,
+ "loss": 0.4798,
+ "step": 39800
+ },
+ {
+ "epoch": 0.20560207614242007,
+ "grad_norm": 21302.05859375,
+ "learning_rate": 9.531912814332206e-05,
+ "loss": 0.4811,
+ "step": 39850
+ },
+ {
+ "epoch": 0.2058600461250329,
+ "grad_norm": 21545.626953125,
+ "learning_rate": 9.530368360140394e-05,
+ "loss": 0.4814,
+ "step": 39900
+ },
+ {
+ "epoch": 0.2061180161076457,
+ "grad_norm": 22709.7265625,
+ "learning_rate": 9.528821487725678e-05,
+ "loss": 0.4827,
+ "step": 39950
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "grad_norm": 20853.228515625,
+ "learning_rate": 9.527272197913746e-05,
+ "loss": 0.4838,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "eval_loss": 0.47092095017433167,
+ "eval_runtime": 3339.7722,
+ "eval_samples_per_second": 928.542,
+ "eval_steps_per_second": 1.814,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20663395607287136,
+ "grad_norm": 18389.748046875,
+ "learning_rate": 9.525720491531581e-05,
+ "loss": 0.4809,
+ "step": 40050
+ },
+ {
+ "epoch": 0.20689192605548418,
+ "grad_norm": 20328.59765625,
+ "learning_rate": 9.524166369407453e-05,
+ "loss": 0.4827,
+ "step": 40100
+ },
+ {
+ "epoch": 0.207149896038097,
+ "grad_norm": 21094.966796875,
+ "learning_rate": 9.522609832370924e-05,
+ "loss": 0.484,
+ "step": 40150
+ },
+ {
+ "epoch": 0.20740786602070982,
+ "grad_norm": 22630.64453125,
+ "learning_rate": 9.52105088125284e-05,
+ "loss": 0.4829,
+ "step": 40200
+ },
+ {
+ "epoch": 0.20766583600332264,
+ "grad_norm": 19477.7265625,
+ "learning_rate": 9.51948951688534e-05,
+ "loss": 0.4793,
+ "step": 40250
+ },
+ {
+ "epoch": 0.2079238059859355,
+ "grad_norm": 20242.53125,
+ "learning_rate": 9.517925740101851e-05,
+ "loss": 0.4797,
+ "step": 40300
+ },
+ {
+ "epoch": 0.2081817759685483,
+ "grad_norm": 19952.421875,
+ "learning_rate": 9.516359551737087e-05,
+ "loss": 0.4785,
+ "step": 40350
+ },
+ {
+ "epoch": 0.20843974595116113,
+ "grad_norm": 19216.220703125,
+ "learning_rate": 9.514790952627049e-05,
+ "loss": 0.4753,
+ "step": 40400
+ },
+ {
+ "epoch": 0.20869771593377395,
+ "grad_norm": 20297.515625,
+ "learning_rate": 9.513219943609024e-05,
+ "loss": 0.4792,
+ "step": 40450
+ },
+ {
+ "epoch": 0.20895568591638677,
+ "grad_norm": 19528.7890625,
+ "learning_rate": 9.511646525521585e-05,
+ "loss": 0.4801,
+ "step": 40500
+ },
+ {
+ "epoch": 0.2092136558989996,
+ "grad_norm": 18037.7890625,
+ "learning_rate": 9.510070699204597e-05,
+ "loss": 0.483,
+ "step": 40550
+ },
+ {
+ "epoch": 0.20947162588161242,
+ "grad_norm": 20636.4296875,
+ "learning_rate": 9.508492465499199e-05,
+ "loss": 0.4761,
+ "step": 40600
+ },
+ {
+ "epoch": 0.20972959586422524,
+ "grad_norm": 20096.857421875,
+ "learning_rate": 9.506911825247827e-05,
+ "loss": 0.4804,
+ "step": 40650
+ },
+ {
+ "epoch": 0.20998756584683806,
+ "grad_norm": 20855.619140625,
+ "learning_rate": 9.505328779294192e-05,
+ "loss": 0.4823,
+ "step": 40700
+ },
+ {
+ "epoch": 0.21024553582945088,
+ "grad_norm": 19640.521484375,
+ "learning_rate": 9.503743328483296e-05,
+ "loss": 0.4818,
+ "step": 40750
+ },
+ {
+ "epoch": 0.2105035058120637,
+ "grad_norm": 20990.525390625,
+ "learning_rate": 9.50215547366142e-05,
+ "loss": 0.4804,
+ "step": 40800
+ },
+ {
+ "epoch": 0.21076147579467652,
+ "grad_norm": 18773.564453125,
+ "learning_rate": 9.500565215676132e-05,
+ "loss": 0.4798,
+ "step": 40850
+ },
+ {
+ "epoch": 0.21101944577728934,
+ "grad_norm": 18688.7265625,
+ "learning_rate": 9.498972555376282e-05,
+ "loss": 0.4773,
+ "step": 40900
+ },
+ {
+ "epoch": 0.21127741575990217,
+ "grad_norm": 22649.3671875,
+ "learning_rate": 9.497377493611998e-05,
+ "loss": 0.478,
+ "step": 40950
+ },
+ {
+ "epoch": 0.21153538574251501,
+ "grad_norm": 19575.95703125,
+ "learning_rate": 9.495780031234694e-05,
+ "loss": 0.4809,
+ "step": 41000
+ },
+ {
+ "epoch": 0.21179335572512784,
+ "grad_norm": 18587.681640625,
+ "learning_rate": 9.494180169097067e-05,
+ "loss": 0.4805,
+ "step": 41050
+ },
+ {
+ "epoch": 0.21205132570774066,
+ "grad_norm": 19466.5703125,
+ "learning_rate": 9.492577908053089e-05,
+ "loss": 0.4772,
+ "step": 41100
+ },
+ {
+ "epoch": 0.21230929569035348,
+ "grad_norm": 21085.15234375,
+ "learning_rate": 9.490973248958018e-05,
+ "loss": 0.4787,
+ "step": 41150
+ },
+ {
+ "epoch": 0.2125672656729663,
+ "grad_norm": 21866.95703125,
+ "learning_rate": 9.489366192668388e-05,
+ "loss": 0.4803,
+ "step": 41200
+ },
+ {
+ "epoch": 0.21282523565557912,
+ "grad_norm": 20759.609375,
+ "learning_rate": 9.487756740042015e-05,
+ "loss": 0.4782,
+ "step": 41250
+ },
+ {
+ "epoch": 0.21308320563819194,
+ "grad_norm": 20565.51171875,
+ "learning_rate": 9.486144891937997e-05,
+ "loss": 0.4765,
+ "step": 41300
+ },
+ {
+ "epoch": 0.21334117562080476,
+ "grad_norm": 21536.017578125,
+ "learning_rate": 9.484530649216705e-05,
+ "loss": 0.4753,
+ "step": 41350
+ },
+ {
+ "epoch": 0.21359914560341758,
+ "grad_norm": 19452.001953125,
+ "learning_rate": 9.482914012739788e-05,
+ "loss": 0.4807,
+ "step": 41400
+ },
+ {
+ "epoch": 0.2138571155860304,
+ "grad_norm": 21220.927734375,
+ "learning_rate": 9.481294983370179e-05,
+ "loss": 0.4803,
+ "step": 41450
+ },
+ {
+ "epoch": 0.21411508556864323,
+ "grad_norm": 18278.884765625,
+ "learning_rate": 9.479673561972082e-05,
+ "loss": 0.4807,
+ "step": 41500
+ },
+ {
+ "epoch": 0.21437305555125605,
+ "grad_norm": 21568.13671875,
+ "learning_rate": 9.478049749410983e-05,
+ "loss": 0.4751,
+ "step": 41550
+ },
+ {
+ "epoch": 0.21463102553386887,
+ "grad_norm": 21004.734375,
+ "learning_rate": 9.47642354655364e-05,
+ "loss": 0.4828,
+ "step": 41600
+ },
+ {
+ "epoch": 0.2148889955164817,
+ "grad_norm": 20709.193359375,
+ "learning_rate": 9.474794954268089e-05,
+ "loss": 0.477,
+ "step": 41650
+ },
+ {
+ "epoch": 0.21514696549909454,
+ "grad_norm": 21408.3671875,
+ "learning_rate": 9.47316397342364e-05,
+ "loss": 0.4783,
+ "step": 41700
+ },
+ {
+ "epoch": 0.21540493548170736,
+ "grad_norm": 18606.6328125,
+ "learning_rate": 9.47153060489088e-05,
+ "loss": 0.4771,
+ "step": 41750
+ },
+ {
+ "epoch": 0.21566290546432018,
+ "grad_norm": 19498.20703125,
+ "learning_rate": 9.469894849541667e-05,
+ "loss": 0.4782,
+ "step": 41800
+ },
+ {
+ "epoch": 0.215920875446933,
+ "grad_norm": 20441.9765625,
+ "learning_rate": 9.46825670824914e-05,
+ "loss": 0.4769,
+ "step": 41850
+ },
+ {
+ "epoch": 0.21617884542954582,
+ "grad_norm": 20925.109375,
+ "learning_rate": 9.466616181887704e-05,
+ "loss": 0.4858,
+ "step": 41900
+ },
+ {
+ "epoch": 0.21643681541215865,
+ "grad_norm": 21410.38671875,
+ "learning_rate": 9.464973271333042e-05,
+ "loss": 0.4791,
+ "step": 41950
+ },
+ {
+ "epoch": 0.21669478539477147,
+ "grad_norm": 19169.583984375,
+ "learning_rate": 9.463327977462106e-05,
+ "loss": 0.4783,
+ "step": 42000
+ },
+ {
+ "epoch": 0.2169527553773843,
+ "grad_norm": 19487.3359375,
+ "learning_rate": 9.461680301153124e-05,
+ "loss": 0.4792,
+ "step": 42050
+ },
+ {
+ "epoch": 0.2172107253599971,
+ "grad_norm": 21303.861328125,
+ "learning_rate": 9.460030243285592e-05,
+ "loss": 0.4811,
+ "step": 42100
+ },
+ {
+ "epoch": 0.21746869534260993,
+ "grad_norm": 21529.490234375,
+ "learning_rate": 9.458377804740279e-05,
+ "loss": 0.4761,
+ "step": 42150
+ },
+ {
+ "epoch": 0.21772666532522275,
+ "grad_norm": 21356.505859375,
+ "learning_rate": 9.456722986399227e-05,
+ "loss": 0.477,
+ "step": 42200
+ },
+ {
+ "epoch": 0.21798463530783557,
+ "grad_norm": 19551.33203125,
+ "learning_rate": 9.455065789145742e-05,
+ "loss": 0.4777,
+ "step": 42250
+ },
+ {
+ "epoch": 0.2182426052904484,
+ "grad_norm": 21424.58984375,
+ "learning_rate": 9.453406213864408e-05,
+ "loss": 0.4759,
+ "step": 42300
+ },
+ {
+ "epoch": 0.21850057527306121,
+ "grad_norm": 18835.1953125,
+ "learning_rate": 9.451744261441072e-05,
+ "loss": 0.4749,
+ "step": 42350
+ },
+ {
+ "epoch": 0.21875854525567404,
+ "grad_norm": 20333.490234375,
+ "learning_rate": 9.450079932762852e-05,
+ "loss": 0.4786,
+ "step": 42400
+ },
+ {
+ "epoch": 0.21901651523828689,
+ "grad_norm": 18957.232421875,
+ "learning_rate": 9.448413228718134e-05,
+ "loss": 0.4778,
+ "step": 42450
+ },
+ {
+ "epoch": 0.2192744852208997,
+ "grad_norm": 20251.939453125,
+ "learning_rate": 9.446744150196574e-05,
+ "loss": 0.4759,
+ "step": 42500
+ },
+ {
+ "epoch": 0.21953245520351253,
+ "grad_norm": 20740.82421875,
+ "learning_rate": 9.445072698089091e-05,
+ "loss": 0.4782,
+ "step": 42550
+ },
+ {
+ "epoch": 0.21979042518612535,
+ "grad_norm": 19501.91015625,
+ "learning_rate": 9.443398873287877e-05,
+ "loss": 0.479,
+ "step": 42600
+ },
+ {
+ "epoch": 0.22004839516873817,
+ "grad_norm": 20895.58984375,
+ "learning_rate": 9.441722676686386e-05,
+ "loss": 0.4754,
+ "step": 42650
+ },
+ {
+ "epoch": 0.220306365151351,
+ "grad_norm": 19932.66796875,
+ "learning_rate": 9.440044109179338e-05,
+ "loss": 0.4778,
+ "step": 42700
+ },
+ {
+ "epoch": 0.2205643351339638,
+ "grad_norm": 20158.693359375,
+ "learning_rate": 9.438363171662722e-05,
+ "loss": 0.4755,
+ "step": 42750
+ },
+ {
+ "epoch": 0.22082230511657663,
+ "grad_norm": 19128.953125,
+ "learning_rate": 9.436679865033789e-05,
+ "loss": 0.4744,
+ "step": 42800
+ },
+ {
+ "epoch": 0.22108027509918945,
+ "grad_norm": 19743.517578125,
+ "learning_rate": 9.434994190191054e-05,
+ "loss": 0.4781,
+ "step": 42850
+ },
+ {
+ "epoch": 0.22133824508180228,
+ "grad_norm": 17826.703125,
+ "learning_rate": 9.4333061480343e-05,
+ "loss": 0.4762,
+ "step": 42900
+ },
+ {
+ "epoch": 0.2215962150644151,
+ "grad_norm": 20606.48046875,
+ "learning_rate": 9.43161573946457e-05,
+ "loss": 0.4741,
+ "step": 42950
+ },
+ {
+ "epoch": 0.22185418504702792,
+ "grad_norm": 20116.66796875,
+ "learning_rate": 9.429922965384172e-05,
+ "loss": 0.4766,
+ "step": 43000
+ },
+ {
+ "epoch": 0.22211215502964074,
+ "grad_norm": 20560.970703125,
+ "learning_rate": 9.428227826696674e-05,
+ "loss": 0.481,
+ "step": 43050
+ },
+ {
+ "epoch": 0.22237012501225356,
+ "grad_norm": 20832.01953125,
+ "learning_rate": 9.42653032430691e-05,
+ "loss": 0.4806,
+ "step": 43100
+ },
+ {
+ "epoch": 0.2226280949948664,
+ "grad_norm": 18686.953125,
+ "learning_rate": 9.424830459120974e-05,
+ "loss": 0.4796,
+ "step": 43150
+ },
+ {
+ "epoch": 0.22288606497747923,
+ "grad_norm": 21061.240234375,
+ "learning_rate": 9.423128232046223e-05,
+ "loss": 0.474,
+ "step": 43200
+ },
+ {
+ "epoch": 0.22314403496009205,
+ "grad_norm": 21862.25,
+ "learning_rate": 9.421423643991267e-05,
+ "loss": 0.4721,
+ "step": 43250
+ },
+ {
+ "epoch": 0.22340200494270487,
+ "grad_norm": 18299.23828125,
+ "learning_rate": 9.419716695865988e-05,
+ "loss": 0.4744,
+ "step": 43300
+ },
+ {
+ "epoch": 0.2236599749253177,
+ "grad_norm": 20387.876953125,
+ "learning_rate": 9.418007388581517e-05,
+ "loss": 0.4748,
+ "step": 43350
+ },
+ {
+ "epoch": 0.22391794490793052,
+ "grad_norm": 21721.740234375,
+ "learning_rate": 9.416295723050254e-05,
+ "loss": 0.4782,
+ "step": 43400
+ },
+ {
+ "epoch": 0.22417591489054334,
+ "grad_norm": 20274.72265625,
+ "learning_rate": 9.414581700185851e-05,
+ "loss": 0.4734,
+ "step": 43450
+ },
+ {
+ "epoch": 0.22443388487315616,
+ "grad_norm": 22443.296875,
+ "learning_rate": 9.41286532090322e-05,
+ "loss": 0.4734,
+ "step": 43500
+ },
+ {
+ "epoch": 0.22469185485576898,
+ "grad_norm": 19874.8203125,
+ "learning_rate": 9.411146586118529e-05,
+ "loss": 0.4755,
+ "step": 43550
+ },
+ {
+ "epoch": 0.2249498248383818,
+ "grad_norm": 20362.3125,
+ "learning_rate": 9.409425496749209e-05,
+ "loss": 0.4776,
+ "step": 43600
+ },
+ {
+ "epoch": 0.22520779482099462,
+ "grad_norm": 22146.5078125,
+ "learning_rate": 9.40770205371394e-05,
+ "loss": 0.4784,
+ "step": 43650
+ },
+ {
+ "epoch": 0.22546576480360744,
+ "grad_norm": 19917.83203125,
+ "learning_rate": 9.405976257932667e-05,
+ "loss": 0.4744,
+ "step": 43700
+ },
+ {
+ "epoch": 0.22572373478622026,
+ "grad_norm": 19296.904296875,
+ "learning_rate": 9.404248110326583e-05,
+ "loss": 0.4766,
+ "step": 43750
+ },
+ {
+ "epoch": 0.22598170476883309,
+ "grad_norm": 20648.35546875,
+ "learning_rate": 9.402517611818142e-05,
+ "loss": 0.4801,
+ "step": 43800
+ },
+ {
+ "epoch": 0.22623967475144593,
+ "grad_norm": 21750.517578125,
+ "learning_rate": 9.40078476333105e-05,
+ "loss": 0.4752,
+ "step": 43850
+ },
+ {
+ "epoch": 0.22649764473405876,
+ "grad_norm": 21233.337890625,
+ "learning_rate": 9.399049565790266e-05,
+ "loss": 0.4758,
+ "step": 43900
+ },
+ {
+ "epoch": 0.22675561471667158,
+ "grad_norm": 21952.6796875,
+ "learning_rate": 9.397312020122006e-05,
+ "loss": 0.4755,
+ "step": 43950
+ },
+ {
+ "epoch": 0.2270135846992844,
+ "grad_norm": 18598.826171875,
+ "learning_rate": 9.39557212725374e-05,
+ "loss": 0.4725,
+ "step": 44000
+ },
+ {
+ "epoch": 0.22727155468189722,
+ "grad_norm": 20325.51171875,
+ "learning_rate": 9.393829888114188e-05,
+ "loss": 0.4789,
+ "step": 44050
+ },
+ {
+ "epoch": 0.22752952466451004,
+ "grad_norm": 17499.228515625,
+ "learning_rate": 9.392085303633323e-05,
+ "loss": 0.4738,
+ "step": 44100
+ },
+ {
+ "epoch": 0.22778749464712286,
+ "grad_norm": 21283.970703125,
+ "learning_rate": 9.39033837474237e-05,
+ "loss": 0.4743,
+ "step": 44150
+ },
+ {
+ "epoch": 0.22804546462973568,
+ "grad_norm": 19672.765625,
+ "learning_rate": 9.388589102373807e-05,
+ "loss": 0.4751,
+ "step": 44200
+ },
+ {
+ "epoch": 0.2283034346123485,
+ "grad_norm": 19722.314453125,
+ "learning_rate": 9.386837487461361e-05,
+ "loss": 0.4767,
+ "step": 44250
+ },
+ {
+ "epoch": 0.22856140459496133,
+ "grad_norm": 19948.154296875,
+ "learning_rate": 9.38508353094001e-05,
+ "loss": 0.4765,
+ "step": 44300
+ },
+ {
+ "epoch": 0.22881937457757415,
+ "grad_norm": 19880.611328125,
+ "learning_rate": 9.383327233745984e-05,
+ "loss": 0.4754,
+ "step": 44350
+ },
+ {
+ "epoch": 0.22907734456018697,
+ "grad_norm": 20052.91796875,
+ "learning_rate": 9.381568596816757e-05,
+ "loss": 0.4801,
+ "step": 44400
+ },
+ {
+ "epoch": 0.2293353145427998,
+ "grad_norm": 23129.869140625,
+ "learning_rate": 9.379807621091057e-05,
+ "loss": 0.4713,
+ "step": 44450
+ },
+ {
+ "epoch": 0.2295932845254126,
+ "grad_norm": 19922.0703125,
+ "learning_rate": 9.37804430750886e-05,
+ "loss": 0.4736,
+ "step": 44500
+ },
+ {
+ "epoch": 0.22985125450802546,
+ "grad_norm": 19704.24609375,
+ "learning_rate": 9.376278657011388e-05,
+ "loss": 0.4682,
+ "step": 44550
+ },
+ {
+ "epoch": 0.23010922449063828,
+ "grad_norm": 19080.125,
+ "learning_rate": 9.374510670541109e-05,
+ "loss": 0.4751,
+ "step": 44600
+ },
+ {
+ "epoch": 0.2303671944732511,
+ "grad_norm": 20858.388671875,
+ "learning_rate": 9.372740349041742e-05,
+ "loss": 0.4734,
+ "step": 44650
+ },
+ {
+ "epoch": 0.23062516445586392,
+ "grad_norm": 22074.056640625,
+ "learning_rate": 9.37096769345825e-05,
+ "loss": 0.4699,
+ "step": 44700
+ },
+ {
+ "epoch": 0.23088313443847674,
+ "grad_norm": 21852.623046875,
+ "learning_rate": 9.369192704736842e-05,
+ "loss": 0.47,
+ "step": 44750
+ },
+ {
+ "epoch": 0.23114110442108957,
+ "grad_norm": 20904.033203125,
+ "learning_rate": 9.367415383824974e-05,
+ "loss": 0.4736,
+ "step": 44800
+ },
+ {
+ "epoch": 0.2313990744037024,
+ "grad_norm": 18965.021484375,
+ "learning_rate": 9.365635731671343e-05,
+ "loss": 0.4687,
+ "step": 44850
+ },
+ {
+ "epoch": 0.2316570443863152,
+ "grad_norm": 16994.271484375,
+ "learning_rate": 9.363853749225894e-05,
+ "loss": 0.4747,
+ "step": 44900
+ },
+ {
+ "epoch": 0.23191501436892803,
+ "grad_norm": 19191.794921875,
+ "learning_rate": 9.362069437439814e-05,
+ "loss": 0.4689,
+ "step": 44950
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "grad_norm": 19691.982421875,
+ "learning_rate": 9.360282797265537e-05,
+ "loss": 0.4683,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "eval_loss": 0.4633353352546692,
+ "eval_runtime": 3256.5731,
+ "eval_samples_per_second": 952.265,
+ "eval_steps_per_second": 1.86,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23243095433415367,
+ "grad_norm": 21778.20703125,
+ "learning_rate": 9.358493829656732e-05,
+ "loss": 0.4726,
+ "step": 45050
+ },
+ {
+ "epoch": 0.2326889243167665,
+ "grad_norm": 20281.802734375,
+ "learning_rate": 9.35670253556832e-05,
+ "loss": 0.4752,
+ "step": 45100
+ },
+ {
+ "epoch": 0.23294689429937931,
+ "grad_norm": 20620.580078125,
+ "learning_rate": 9.354908915956456e-05,
+ "loss": 0.474,
+ "step": 45150
+ },
+ {
+ "epoch": 0.23320486428199214,
+ "grad_norm": 21115.86328125,
+ "learning_rate": 9.353112971778542e-05,
+ "loss": 0.4763,
+ "step": 45200
+ },
+ {
+ "epoch": 0.23346283426460496,
+ "grad_norm": 19746.30859375,
+ "learning_rate": 9.351314703993215e-05,
+ "loss": 0.4792,
+ "step": 45250
+ },
+ {
+ "epoch": 0.2337208042472178,
+ "grad_norm": 21270.26171875,
+ "learning_rate": 9.349514113560358e-05,
+ "loss": 0.4726,
+ "step": 45300
+ },
+ {
+ "epoch": 0.23397877422983063,
+ "grad_norm": 20273.658203125,
+ "learning_rate": 9.347711201441092e-05,
+ "loss": 0.4683,
+ "step": 45350
+ },
+ {
+ "epoch": 0.23423674421244345,
+ "grad_norm": 19746.9609375,
+ "learning_rate": 9.345905968597773e-05,
+ "loss": 0.4778,
+ "step": 45400
+ },
+ {
+ "epoch": 0.23449471419505627,
+ "grad_norm": 22999.52734375,
+ "learning_rate": 9.344098415994003e-05,
+ "loss": 0.4799,
+ "step": 45450
+ },
+ {
+ "epoch": 0.2347526841776691,
+ "grad_norm": 19922.41015625,
+ "learning_rate": 9.342288544594617e-05,
+ "loss": 0.4773,
+ "step": 45500
+ },
+ {
+ "epoch": 0.2350106541602819,
+ "grad_norm": 19793.73828125,
+ "learning_rate": 9.340476355365688e-05,
+ "loss": 0.4743,
+ "step": 45550
+ },
+ {
+ "epoch": 0.23526862414289473,
+ "grad_norm": 19525.74609375,
+ "learning_rate": 9.33866184927453e-05,
+ "loss": 0.4729,
+ "step": 45600
+ },
+ {
+ "epoch": 0.23552659412550755,
+ "grad_norm": 26093.65625,
+ "learning_rate": 9.336845027289691e-05,
+ "loss": 0.4767,
+ "step": 45650
+ },
+ {
+ "epoch": 0.23578456410812038,
+ "grad_norm": 20045.16796875,
+ "learning_rate": 9.335025890380953e-05,
+ "loss": 0.4768,
+ "step": 45700
+ },
+ {
+ "epoch": 0.2360425340907332,
+ "grad_norm": 21272.36328125,
+ "learning_rate": 9.333204439519338e-05,
+ "loss": 0.4738,
+ "step": 45750
+ },
+ {
+ "epoch": 0.23630050407334602,
+ "grad_norm": 19174.44921875,
+ "learning_rate": 9.3313806756771e-05,
+ "loss": 0.4752,
+ "step": 45800
+ },
+ {
+ "epoch": 0.23655847405595884,
+ "grad_norm": 18446.640625,
+ "learning_rate": 9.32955459982773e-05,
+ "loss": 0.4747,
+ "step": 45850
+ },
+ {
+ "epoch": 0.23681644403857166,
+ "grad_norm": 23397.7109375,
+ "learning_rate": 9.327726212945953e-05,
+ "loss": 0.4723,
+ "step": 45900
+ },
+ {
+ "epoch": 0.23707441402118448,
+ "grad_norm": 20350.755859375,
+ "learning_rate": 9.325895516007725e-05,
+ "loss": 0.4671,
+ "step": 45950
+ },
+ {
+ "epoch": 0.23733238400379733,
+ "grad_norm": 21147.5546875,
+ "learning_rate": 9.324062509990235e-05,
+ "loss": 0.4689,
+ "step": 46000
+ },
+ {
+ "epoch": 0.23759035398641015,
+ "grad_norm": 19813.130859375,
+ "learning_rate": 9.322227195871909e-05,
+ "loss": 0.4723,
+ "step": 46050
+ },
+ {
+ "epoch": 0.23784832396902297,
+ "grad_norm": 22310.037109375,
+ "learning_rate": 9.320389574632399e-05,
+ "loss": 0.4727,
+ "step": 46100
+ },
+ {
+ "epoch": 0.2381062939516358,
+ "grad_norm": 19646.509765625,
+ "learning_rate": 9.318549647252596e-05,
+ "loss": 0.4723,
+ "step": 46150
+ },
+ {
+ "epoch": 0.23836426393424862,
+ "grad_norm": 20145.29296875,
+ "learning_rate": 9.316707414714614e-05,
+ "loss": 0.4652,
+ "step": 46200
+ },
+ {
+ "epoch": 0.23862223391686144,
+ "grad_norm": 19513.466796875,
+ "learning_rate": 9.314862878001803e-05,
+ "loss": 0.4774,
+ "step": 46250
+ },
+ {
+ "epoch": 0.23888020389947426,
+ "grad_norm": 20701.25390625,
+ "learning_rate": 9.313016038098739e-05,
+ "loss": 0.4721,
+ "step": 46300
+ },
+ {
+ "epoch": 0.23913817388208708,
+ "grad_norm": 18766.328125,
+ "learning_rate": 9.31116689599123e-05,
+ "loss": 0.4691,
+ "step": 46350
+ },
+ {
+ "epoch": 0.2393961438646999,
+ "grad_norm": 20925.5,
+ "learning_rate": 9.309315452666314e-05,
+ "loss": 0.4743,
+ "step": 46400
+ },
+ {
+ "epoch": 0.23965411384731272,
+ "grad_norm": 19413.0703125,
+ "learning_rate": 9.307461709112253e-05,
+ "loss": 0.469,
+ "step": 46450
+ },
+ {
+ "epoch": 0.23991208382992554,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.305605666318543e-05,
+ "loss": 0.4769,
+ "step": 46500
+ },
+ {
+ "epoch": 0.24017005381253836,
+ "grad_norm": 20222.50390625,
+ "learning_rate": 9.3037473252759e-05,
+ "loss": 0.4701,
+ "step": 46550
+ },
+ {
+ "epoch": 0.24042802379515119,
+ "grad_norm": 21650.63671875,
+ "learning_rate": 9.301886686976272e-05,
+ "loss": 0.4693,
+ "step": 46600
+ },
+ {
+ "epoch": 0.240685993777764,
+ "grad_norm": 18923.498046875,
+ "learning_rate": 9.300023752412832e-05,
+ "loss": 0.4749,
+ "step": 46650
+ },
+ {
+ "epoch": 0.24094396376037686,
+ "grad_norm": 21353.748046875,
+ "learning_rate": 9.298158522579978e-05,
+ "loss": 0.4735,
+ "step": 46700
+ },
+ {
+ "epoch": 0.24120193374298968,
+ "grad_norm": 19405.5234375,
+ "learning_rate": 9.296290998473334e-05,
+ "loss": 0.4708,
+ "step": 46750
+ },
+ {
+ "epoch": 0.2414599037256025,
+ "grad_norm": 21692.3203125,
+ "learning_rate": 9.294421181089747e-05,
+ "loss": 0.4644,
+ "step": 46800
+ },
+ {
+ "epoch": 0.24171787370821532,
+ "grad_norm": 18488.671875,
+ "learning_rate": 9.292549071427291e-05,
+ "loss": 0.4668,
+ "step": 46850
+ },
+ {
+ "epoch": 0.24197584369082814,
+ "grad_norm": 21951.712890625,
+ "learning_rate": 9.29067467048526e-05,
+ "loss": 0.4749,
+ "step": 46900
+ },
+ {
+ "epoch": 0.24223381367344096,
+ "grad_norm": 20673.82421875,
+ "learning_rate": 9.288797979264176e-05,
+ "loss": 0.4687,
+ "step": 46950
+ },
+ {
+ "epoch": 0.24249178365605378,
+ "grad_norm": 18687.69140625,
+ "learning_rate": 9.286918998765776e-05,
+ "loss": 0.4731,
+ "step": 47000
+ },
+ {
+ "epoch": 0.2427497536386666,
+ "grad_norm": 18882.009765625,
+ "learning_rate": 9.285037729993027e-05,
+ "loss": 0.4699,
+ "step": 47050
+ },
+ {
+ "epoch": 0.24300772362127943,
+ "grad_norm": 22378.685546875,
+ "learning_rate": 9.283154173950112e-05,
+ "loss": 0.4678,
+ "step": 47100
+ },
+ {
+ "epoch": 0.24326569360389225,
+ "grad_norm": 19457.736328125,
+ "learning_rate": 9.281268331642439e-05,
+ "loss": 0.4665,
+ "step": 47150
+ },
+ {
+ "epoch": 0.24352366358650507,
+ "grad_norm": 19794.4296875,
+ "learning_rate": 9.279380204076631e-05,
+ "loss": 0.4683,
+ "step": 47200
+ },
+ {
+ "epoch": 0.2437816335691179,
+ "grad_norm": 18910.41796875,
+ "learning_rate": 9.277489792260536e-05,
+ "loss": 0.4683,
+ "step": 47250
+ },
+ {
+ "epoch": 0.2440396035517307,
+ "grad_norm": 21774.009765625,
+ "learning_rate": 9.275597097203216e-05,
+ "loss": 0.4729,
+ "step": 47300
+ },
+ {
+ "epoch": 0.24429757353434353,
+ "grad_norm": 21403.1796875,
+ "learning_rate": 9.273702119914962e-05,
+ "loss": 0.4681,
+ "step": 47350
+ },
+ {
+ "epoch": 0.24455554351695638,
+ "grad_norm": 20333.400390625,
+ "learning_rate": 9.271804861407269e-05,
+ "loss": 0.4713,
+ "step": 47400
+ },
+ {
+ "epoch": 0.2448135134995692,
+ "grad_norm": 22196.32421875,
+ "learning_rate": 9.269905322692862e-05,
+ "loss": 0.468,
+ "step": 47450
+ },
+ {
+ "epoch": 0.24507148348218202,
+ "grad_norm": 18356.623046875,
+ "learning_rate": 9.268003504785673e-05,
+ "loss": 0.4663,
+ "step": 47500
+ },
+ {
+ "epoch": 0.24532945346479484,
+ "grad_norm": 20337.546875,
+ "learning_rate": 9.266099408700859e-05,
+ "loss": 0.4657,
+ "step": 47550
+ },
+ {
+ "epoch": 0.24558742344740767,
+ "grad_norm": 20426.03515625,
+ "learning_rate": 9.264193035454789e-05,
+ "loss": 0.4677,
+ "step": 47600
+ },
+ {
+ "epoch": 0.2458453934300205,
+ "grad_norm": 20962.81640625,
+ "learning_rate": 9.262284386065047e-05,
+ "loss": 0.4759,
+ "step": 47650
+ },
+ {
+ "epoch": 0.2461033634126333,
+ "grad_norm": 20498.919921875,
+ "learning_rate": 9.260373461550435e-05,
+ "loss": 0.4647,
+ "step": 47700
+ },
+ {
+ "epoch": 0.24636133339524613,
+ "grad_norm": 21223.171875,
+ "learning_rate": 9.258460262930967e-05,
+ "loss": 0.4698,
+ "step": 47750
+ },
+ {
+ "epoch": 0.24661930337785895,
+ "grad_norm": 21146.671875,
+ "learning_rate": 9.256544791227871e-05,
+ "loss": 0.4727,
+ "step": 47800
+ },
+ {
+ "epoch": 0.24687727336047177,
+ "grad_norm": 19261.603515625,
+ "learning_rate": 9.254627047463588e-05,
+ "loss": 0.4734,
+ "step": 47850
+ },
+ {
+ "epoch": 0.2471352433430846,
+ "grad_norm": 21131.298828125,
+ "learning_rate": 9.252707032661774e-05,
+ "loss": 0.4686,
+ "step": 47900
+ },
+ {
+ "epoch": 0.24739321332569741,
+ "grad_norm": 22491.212890625,
+ "learning_rate": 9.250784747847294e-05,
+ "loss": 0.4701,
+ "step": 47950
+ },
+ {
+ "epoch": 0.24765118330831024,
+ "grad_norm": 20198.486328125,
+ "learning_rate": 9.248860194046228e-05,
+ "loss": 0.4657,
+ "step": 48000
+ },
+ {
+ "epoch": 0.24790915329092306,
+ "grad_norm": 21754.078125,
+ "learning_rate": 9.246933372285863e-05,
+ "loss": 0.4674,
+ "step": 48050
+ },
+ {
+ "epoch": 0.24816712327353588,
+ "grad_norm": 20948.244140625,
+ "learning_rate": 9.245004283594703e-05,
+ "loss": 0.4604,
+ "step": 48100
+ },
+ {
+ "epoch": 0.24842509325614873,
+ "grad_norm": 20916.3671875,
+ "learning_rate": 9.243072929002454e-05,
+ "loss": 0.4656,
+ "step": 48150
+ },
+ {
+ "epoch": 0.24868306323876155,
+ "grad_norm": 19935.021484375,
+ "learning_rate": 9.24113930954004e-05,
+ "loss": 0.4735,
+ "step": 48200
+ },
+ {
+ "epoch": 0.24894103322137437,
+ "grad_norm": 20075.96875,
+ "learning_rate": 9.239203426239585e-05,
+ "loss": 0.4679,
+ "step": 48250
+ },
+ {
+ "epoch": 0.2491990032039872,
+ "grad_norm": 20107.943359375,
+ "learning_rate": 9.23726528013443e-05,
+ "loss": 0.4773,
+ "step": 48300
+ },
+ {
+ "epoch": 0.2494569731866,
+ "grad_norm": 20341.1171875,
+ "learning_rate": 9.235324872259119e-05,
+ "loss": 0.4699,
+ "step": 48350
+ },
+ {
+ "epoch": 0.24971494316921283,
+ "grad_norm": 21787.4296875,
+ "learning_rate": 9.233382203649401e-05,
+ "loss": 0.4665,
+ "step": 48400
+ },
+ {
+ "epoch": 0.24997291315182565,
+ "grad_norm": 17707.583984375,
+ "learning_rate": 9.231437275342239e-05,
+ "loss": 0.4678,
+ "step": 48450
+ },
+ {
+ "epoch": 0.2502308831344385,
+ "grad_norm": 24467.810546875,
+ "learning_rate": 9.229490088375797e-05,
+ "loss": 0.466,
+ "step": 48500
+ },
+ {
+ "epoch": 0.2504888531170513,
+ "grad_norm": 20794.73828125,
+ "learning_rate": 9.227540643789446e-05,
+ "loss": 0.4711,
+ "step": 48550
+ },
+ {
+ "epoch": 0.2507468230996641,
+ "grad_norm": 20147.099609375,
+ "learning_rate": 9.225588942623758e-05,
+ "loss": 0.4689,
+ "step": 48600
+ },
+ {
+ "epoch": 0.25100479308227697,
+ "grad_norm": 20704.037109375,
+ "learning_rate": 9.223634985920517e-05,
+ "loss": 0.4687,
+ "step": 48650
+ },
+ {
+ "epoch": 0.25126276306488976,
+ "grad_norm": 19472.21875,
+ "learning_rate": 9.221678774722707e-05,
+ "loss": 0.4636,
+ "step": 48700
+ },
+ {
+ "epoch": 0.2515207330475026,
+ "grad_norm": 21352.755859375,
+ "learning_rate": 9.219720310074515e-05,
+ "loss": 0.4671,
+ "step": 48750
+ },
+ {
+ "epoch": 0.2517787030301154,
+ "grad_norm": 20956.146484375,
+ "learning_rate": 9.21775959302133e-05,
+ "loss": 0.4703,
+ "step": 48800
+ },
+ {
+ "epoch": 0.25203667301272825,
+ "grad_norm": 26295.541015625,
+ "learning_rate": 9.215796624609749e-05,
+ "loss": 0.4742,
+ "step": 48850
+ },
+ {
+ "epoch": 0.25229464299534105,
+ "grad_norm": 19862.15625,
+ "learning_rate": 9.213831405887564e-05,
+ "loss": 0.468,
+ "step": 48900
+ },
+ {
+ "epoch": 0.2525526129779539,
+ "grad_norm": 21760.404296875,
+ "learning_rate": 9.211863937903769e-05,
+ "loss": 0.4728,
+ "step": 48950
+ },
+ {
+ "epoch": 0.2528105829605667,
+ "grad_norm": 22488.1484375,
+ "learning_rate": 9.209894221708564e-05,
+ "loss": 0.4627,
+ "step": 49000
+ },
+ {
+ "epoch": 0.25306855294317954,
+ "grad_norm": 20244.5,
+ "learning_rate": 9.20792225835334e-05,
+ "loss": 0.4706,
+ "step": 49050
+ },
+ {
+ "epoch": 0.25332652292579233,
+ "grad_norm": 22642.44140625,
+ "learning_rate": 9.205948048890698e-05,
+ "loss": 0.4708,
+ "step": 49100
+ },
+ {
+ "epoch": 0.2535844929084052,
+ "grad_norm": 23121.501953125,
+ "learning_rate": 9.203971594374432e-05,
+ "loss": 0.4723,
+ "step": 49150
+ },
+ {
+ "epoch": 0.25384246289101803,
+ "grad_norm": 19514.916015625,
+ "learning_rate": 9.201992895859532e-05,
+ "loss": 0.4692,
+ "step": 49200
+ },
+ {
+ "epoch": 0.2541004328736308,
+ "grad_norm": 19467.662109375,
+ "learning_rate": 9.200011954402193e-05,
+ "loss": 0.4719,
+ "step": 49250
+ },
+ {
+ "epoch": 0.25435840285624367,
+ "grad_norm": 20737.7578125,
+ "learning_rate": 9.198028771059799e-05,
+ "loss": 0.4643,
+ "step": 49300
+ },
+ {
+ "epoch": 0.25461637283885646,
+ "grad_norm": 20229.341796875,
+ "learning_rate": 9.196043346890939e-05,
+ "loss": 0.462,
+ "step": 49350
+ },
+ {
+ "epoch": 0.2548743428214693,
+ "grad_norm": 23094.35546875,
+ "learning_rate": 9.194055682955392e-05,
+ "loss": 0.4701,
+ "step": 49400
+ },
+ {
+ "epoch": 0.2551323128040821,
+ "grad_norm": 21099.541015625,
+ "learning_rate": 9.192065780314132e-05,
+ "loss": 0.466,
+ "step": 49450
+ },
+ {
+ "epoch": 0.25539028278669496,
+ "grad_norm": 21500.302734375,
+ "learning_rate": 9.190073640029335e-05,
+ "loss": 0.4703,
+ "step": 49500
+ },
+ {
+ "epoch": 0.25564825276930775,
+ "grad_norm": 24272.228515625,
+ "learning_rate": 9.188079263164366e-05,
+ "loss": 0.4672,
+ "step": 49550
+ },
+ {
+ "epoch": 0.2559062227519206,
+ "grad_norm": 21129.013671875,
+ "learning_rate": 9.186082650783783e-05,
+ "loss": 0.4715,
+ "step": 49600
+ },
+ {
+ "epoch": 0.2561641927345334,
+ "grad_norm": 20696.32421875,
+ "learning_rate": 9.184083803953339e-05,
+ "loss": 0.4646,
+ "step": 49650
+ },
+ {
+ "epoch": 0.25642216271714624,
+ "grad_norm": 20142.7890625,
+ "learning_rate": 9.18208272373998e-05,
+ "loss": 0.4627,
+ "step": 49700
+ },
+ {
+ "epoch": 0.25668013269975903,
+ "grad_norm": 18810.43359375,
+ "learning_rate": 9.180079411211847e-05,
+ "loss": 0.4659,
+ "step": 49750
+ },
+ {
+ "epoch": 0.2569381026823719,
+ "grad_norm": 23121.84765625,
+ "learning_rate": 9.178073867438264e-05,
+ "loss": 0.4683,
+ "step": 49800
+ },
+ {
+ "epoch": 0.2571960726649847,
+ "grad_norm": 20432.021484375,
+ "learning_rate": 9.176066093489755e-05,
+ "loss": 0.4704,
+ "step": 49850
+ },
+ {
+ "epoch": 0.2574540426475975,
+ "grad_norm": 22056.09765625,
+ "learning_rate": 9.17405609043803e-05,
+ "loss": 0.4753,
+ "step": 49900
+ },
+ {
+ "epoch": 0.2577120126302104,
+ "grad_norm": 21094.931640625,
+ "learning_rate": 9.17204385935599e-05,
+ "loss": 0.4648,
+ "step": 49950
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "grad_norm": 20127.525390625,
+ "learning_rate": 9.170029401317725e-05,
+ "loss": 0.4646,
+ "step": 50000
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "eval_loss": 0.4567689299583435,
+ "eval_runtime": 3268.0543,
+ "eval_samples_per_second": 948.919,
+ "eval_steps_per_second": 1.853,
+ "step": 50000
+ },
+ {
+ "epoch": 0.258227952595436,
+ "grad_norm": 20947.306640625,
+ "learning_rate": 9.168012717398516e-05,
+ "loss": 0.4688,
+ "step": 50050
+ },
+ {
+ "epoch": 0.2584859225780488,
+ "grad_norm": 23591.646484375,
+ "learning_rate": 9.165993808674823e-05,
+ "loss": 0.4683,
+ "step": 50100
+ },
+ {
+ "epoch": 0.25874389256066166,
+ "grad_norm": 21227.677734375,
+ "learning_rate": 9.163972676224306e-05,
+ "loss": 0.4671,
+ "step": 50150
+ },
+ {
+ "epoch": 0.25900186254327445,
+ "grad_norm": 20084.953125,
+ "learning_rate": 9.161949321125807e-05,
+ "loss": 0.4598,
+ "step": 50200
+ },
+ {
+ "epoch": 0.2592598325258873,
+ "grad_norm": 21139.5,
+ "learning_rate": 9.159923744459349e-05,
+ "loss": 0.4707,
+ "step": 50250
+ },
+ {
+ "epoch": 0.2595178025085001,
+ "grad_norm": 20410.794921875,
+ "learning_rate": 9.15789594730615e-05,
+ "loss": 0.4675,
+ "step": 50300
+ },
+ {
+ "epoch": 0.25977577249111294,
+ "grad_norm": 20010.328125,
+ "learning_rate": 9.155865930748608e-05,
+ "loss": 0.4599,
+ "step": 50350
+ },
+ {
+ "epoch": 0.26003374247372574,
+ "grad_norm": 23502.890625,
+ "learning_rate": 9.153833695870304e-05,
+ "loss": 0.4664,
+ "step": 50400
+ },
+ {
+ "epoch": 0.2602917124563386,
+ "grad_norm": 20373.498046875,
+ "learning_rate": 9.151799243756008e-05,
+ "loss": 0.4655,
+ "step": 50450
+ },
+ {
+ "epoch": 0.2605496824389514,
+ "grad_norm": 21093.669921875,
+ "learning_rate": 9.149762575491671e-05,
+ "loss": 0.4623,
+ "step": 50500
+ },
+ {
+ "epoch": 0.26080765242156423,
+ "grad_norm": 22206.87890625,
+ "learning_rate": 9.147723692164427e-05,
+ "loss": 0.4687,
+ "step": 50550
+ },
+ {
+ "epoch": 0.261065622404177,
+ "grad_norm": 23264.875,
+ "learning_rate": 9.145682594862593e-05,
+ "loss": 0.4705,
+ "step": 50600
+ },
+ {
+ "epoch": 0.26132359238678987,
+ "grad_norm": 22029.849609375,
+ "learning_rate": 9.143639284675664e-05,
+ "loss": 0.4673,
+ "step": 50650
+ },
+ {
+ "epoch": 0.2615815623694027,
+ "grad_norm": 23016.955078125,
+ "learning_rate": 9.141593762694323e-05,
+ "loss": 0.4663,
+ "step": 50700
+ },
+ {
+ "epoch": 0.2618395323520155,
+ "grad_norm": 21590.80859375,
+ "learning_rate": 9.139546030010427e-05,
+ "loss": 0.4684,
+ "step": 50750
+ },
+ {
+ "epoch": 0.26209750233462836,
+ "grad_norm": 19839.986328125,
+ "learning_rate": 9.13749608771702e-05,
+ "loss": 0.4682,
+ "step": 50800
+ },
+ {
+ "epoch": 0.26235547231724116,
+ "grad_norm": 17922.802734375,
+ "learning_rate": 9.135443936908318e-05,
+ "loss": 0.4601,
+ "step": 50850
+ },
+ {
+ "epoch": 0.262613442299854,
+ "grad_norm": 21141.119140625,
+ "learning_rate": 9.133389578679723e-05,
+ "loss": 0.467,
+ "step": 50900
+ },
+ {
+ "epoch": 0.2628714122824668,
+ "grad_norm": 21858.158203125,
+ "learning_rate": 9.131333014127806e-05,
+ "loss": 0.4663,
+ "step": 50950
+ },
+ {
+ "epoch": 0.26312938226507965,
+ "grad_norm": 21516.46875,
+ "learning_rate": 9.129274244350326e-05,
+ "loss": 0.4656,
+ "step": 51000
+ },
+ {
+ "epoch": 0.26338735224769244,
+ "grad_norm": 21403.263671875,
+ "learning_rate": 9.127213270446213e-05,
+ "loss": 0.4717,
+ "step": 51050
+ },
+ {
+ "epoch": 0.2636453222303053,
+ "grad_norm": 20405.4296875,
+ "learning_rate": 9.125150093515575e-05,
+ "loss": 0.4656,
+ "step": 51100
+ },
+ {
+ "epoch": 0.2639032922129181,
+ "grad_norm": 21057.57421875,
+ "learning_rate": 9.123084714659698e-05,
+ "loss": 0.4655,
+ "step": 51150
+ },
+ {
+ "epoch": 0.26416126219553093,
+ "grad_norm": 19891.15234375,
+ "learning_rate": 9.121017134981036e-05,
+ "loss": 0.4706,
+ "step": 51200
+ },
+ {
+ "epoch": 0.2644192321781437,
+ "grad_norm": 20441.30078125,
+ "learning_rate": 9.118947355583228e-05,
+ "loss": 0.4707,
+ "step": 51250
+ },
+ {
+ "epoch": 0.2646772021607566,
+ "grad_norm": 22182.67578125,
+ "learning_rate": 9.11687537757108e-05,
+ "loss": 0.4633,
+ "step": 51300
+ },
+ {
+ "epoch": 0.2649351721433694,
+ "grad_norm": 18211.728515625,
+ "learning_rate": 9.114801202050574e-05,
+ "loss": 0.4677,
+ "step": 51350
+ },
+ {
+ "epoch": 0.2651931421259822,
+ "grad_norm": 20691.697265625,
+ "learning_rate": 9.112724830128865e-05,
+ "loss": 0.4634,
+ "step": 51400
+ },
+ {
+ "epoch": 0.26545111210859507,
+ "grad_norm": 19717.75390625,
+ "learning_rate": 9.110646262914279e-05,
+ "loss": 0.4647,
+ "step": 51450
+ },
+ {
+ "epoch": 0.26570908209120786,
+ "grad_norm": 19860.55078125,
+ "learning_rate": 9.108565501516318e-05,
+ "loss": 0.4665,
+ "step": 51500
+ },
+ {
+ "epoch": 0.2659670520738207,
+ "grad_norm": 20122.984375,
+ "learning_rate": 9.106482547045648e-05,
+ "loss": 0.4663,
+ "step": 51550
+ },
+ {
+ "epoch": 0.2662250220564335,
+ "grad_norm": 21214.724609375,
+ "learning_rate": 9.104397400614112e-05,
+ "loss": 0.4676,
+ "step": 51600
+ },
+ {
+ "epoch": 0.26648299203904635,
+ "grad_norm": 24545.041015625,
+ "learning_rate": 9.102310063334722e-05,
+ "loss": 0.4705,
+ "step": 51650
+ },
+ {
+ "epoch": 0.26674096202165914,
+ "grad_norm": 22479.380859375,
+ "learning_rate": 9.100220536321655e-05,
+ "loss": 0.4616,
+ "step": 51700
+ },
+ {
+ "epoch": 0.266998932004272,
+ "grad_norm": 20262.27734375,
+ "learning_rate": 9.098128820690264e-05,
+ "loss": 0.4569,
+ "step": 51750
+ },
+ {
+ "epoch": 0.2672569019868848,
+ "grad_norm": 20906.880859375,
+ "learning_rate": 9.096034917557062e-05,
+ "loss": 0.468,
+ "step": 51800
+ },
+ {
+ "epoch": 0.26751487196949764,
+ "grad_norm": 20986.455078125,
+ "learning_rate": 9.093938828039737e-05,
+ "loss": 0.4697,
+ "step": 51850
+ },
+ {
+ "epoch": 0.26777284195211043,
+ "grad_norm": 22425.681640625,
+ "learning_rate": 9.09184055325714e-05,
+ "loss": 0.4692,
+ "step": 51900
+ },
+ {
+ "epoch": 0.2680308119347233,
+ "grad_norm": 21817.744140625,
+ "learning_rate": 9.089740094329288e-05,
+ "loss": 0.4726,
+ "step": 51950
+ },
+ {
+ "epoch": 0.26828878191733607,
+ "grad_norm": 20527.017578125,
+ "learning_rate": 9.087637452377369e-05,
+ "loss": 0.459,
+ "step": 52000
+ },
+ {
+ "epoch": 0.2685467518999489,
+ "grad_norm": 24486.521484375,
+ "learning_rate": 9.08553262852373e-05,
+ "loss": 0.4624,
+ "step": 52050
+ },
+ {
+ "epoch": 0.26880472188256177,
+ "grad_norm": 20964.537109375,
+ "learning_rate": 9.083425623891885e-05,
+ "loss": 0.4657,
+ "step": 52100
+ },
+ {
+ "epoch": 0.26906269186517456,
+ "grad_norm": 20966.478515625,
+ "learning_rate": 9.081316439606513e-05,
+ "loss": 0.4723,
+ "step": 52150
+ },
+ {
+ "epoch": 0.2693206618477874,
+ "grad_norm": 20067.330078125,
+ "learning_rate": 9.079205076793457e-05,
+ "loss": 0.4644,
+ "step": 52200
+ },
+ {
+ "epoch": 0.2695786318304002,
+ "grad_norm": 21526.298828125,
+ "learning_rate": 9.077091536579719e-05,
+ "loss": 0.4602,
+ "step": 52250
+ },
+ {
+ "epoch": 0.26983660181301306,
+ "grad_norm": 20446.767578125,
+ "learning_rate": 9.074975820093468e-05,
+ "loss": 0.4671,
+ "step": 52300
+ },
+ {
+ "epoch": 0.27009457179562585,
+ "grad_norm": 19936.599609375,
+ "learning_rate": 9.072857928464029e-05,
+ "loss": 0.4626,
+ "step": 52350
+ },
+ {
+ "epoch": 0.2703525417782387,
+ "grad_norm": 21716.60546875,
+ "learning_rate": 9.070737862821896e-05,
+ "loss": 0.4642,
+ "step": 52400
+ },
+ {
+ "epoch": 0.2706105117608515,
+ "grad_norm": 17588.40625,
+ "learning_rate": 9.068615624298717e-05,
+ "loss": 0.4595,
+ "step": 52450
+ },
+ {
+ "epoch": 0.27086848174346434,
+ "grad_norm": 21721.138671875,
+ "learning_rate": 9.066491214027302e-05,
+ "loss": 0.4639,
+ "step": 52500
+ },
+ {
+ "epoch": 0.27112645172607713,
+ "grad_norm": 19480.875,
+ "learning_rate": 9.06436463314162e-05,
+ "loss": 0.4654,
+ "step": 52550
+ },
+ {
+ "epoch": 0.27138442170869,
+ "grad_norm": 22658.076171875,
+ "learning_rate": 9.062235882776797e-05,
+ "loss": 0.4653,
+ "step": 52600
+ },
+ {
+ "epoch": 0.2716423916913028,
+ "grad_norm": 22396.4140625,
+ "learning_rate": 9.060104964069121e-05,
+ "loss": 0.4634,
+ "step": 52650
+ },
+ {
+ "epoch": 0.2719003616739156,
+ "grad_norm": 22354.28125,
+ "learning_rate": 9.057971878156036e-05,
+ "loss": 0.4626,
+ "step": 52700
+ },
+ {
+ "epoch": 0.2721583316565285,
+ "grad_norm": 19845.22265625,
+ "learning_rate": 9.05583662617614e-05,
+ "loss": 0.4666,
+ "step": 52750
+ },
+ {
+ "epoch": 0.27241630163914127,
+ "grad_norm": 19933.978515625,
+ "learning_rate": 9.053699209269188e-05,
+ "loss": 0.4601,
+ "step": 52800
+ },
+ {
+ "epoch": 0.2726742716217541,
+ "grad_norm": 21288.86328125,
+ "learning_rate": 9.051559628576094e-05,
+ "loss": 0.4622,
+ "step": 52850
+ },
+ {
+ "epoch": 0.2729322416043669,
+ "grad_norm": 20604.05078125,
+ "learning_rate": 9.049417885238927e-05,
+ "loss": 0.4618,
+ "step": 52900
+ },
+ {
+ "epoch": 0.27319021158697976,
+ "grad_norm": 18641.544921875,
+ "learning_rate": 9.047273980400903e-05,
+ "loss": 0.46,
+ "step": 52950
+ },
+ {
+ "epoch": 0.27344818156959255,
+ "grad_norm": 22482.8125,
+ "learning_rate": 9.045127915206398e-05,
+ "loss": 0.4673,
+ "step": 53000
+ },
+ {
+ "epoch": 0.2737061515522054,
+ "grad_norm": 20967.9375,
+ "learning_rate": 9.042979690800943e-05,
+ "loss": 0.4607,
+ "step": 53050
+ },
+ {
+ "epoch": 0.2739641215348182,
+ "grad_norm": 22371.90234375,
+ "learning_rate": 9.040829308331216e-05,
+ "loss": 0.4624,
+ "step": 53100
+ },
+ {
+ "epoch": 0.27422209151743104,
+ "grad_norm": 19802.947265625,
+ "learning_rate": 9.03867676894505e-05,
+ "loss": 0.4542,
+ "step": 53150
+ },
+ {
+ "epoch": 0.27448006150004384,
+ "grad_norm": 21255.974609375,
+ "learning_rate": 9.03652207379143e-05,
+ "loss": 0.4636,
+ "step": 53200
+ },
+ {
+ "epoch": 0.2747380314826567,
+ "grad_norm": 21687.16796875,
+ "learning_rate": 9.034365224020489e-05,
+ "loss": 0.4626,
+ "step": 53250
+ },
+ {
+ "epoch": 0.2749960014652695,
+ "grad_norm": 21386.275390625,
+ "learning_rate": 9.032206220783512e-05,
+ "loss": 0.4659,
+ "step": 53300
+ },
+ {
+ "epoch": 0.27525397144788233,
+ "grad_norm": 19433.888671875,
+ "learning_rate": 9.030045065232935e-05,
+ "loss": 0.4585,
+ "step": 53350
+ },
+ {
+ "epoch": 0.2755119414304951,
+ "grad_norm": 20615.021484375,
+ "learning_rate": 9.027881758522339e-05,
+ "loss": 0.4619,
+ "step": 53400
+ },
+ {
+ "epoch": 0.27576991141310797,
+ "grad_norm": 20498.369140625,
+ "learning_rate": 9.025716301806454e-05,
+ "loss": 0.4658,
+ "step": 53450
+ },
+ {
+ "epoch": 0.2760278813957208,
+ "grad_norm": 20348.955078125,
+ "learning_rate": 9.023548696241162e-05,
+ "loss": 0.4637,
+ "step": 53500
+ },
+ {
+ "epoch": 0.2762858513783336,
+ "grad_norm": 18524.3203125,
+ "learning_rate": 9.021378942983487e-05,
+ "loss": 0.4636,
+ "step": 53550
+ },
+ {
+ "epoch": 0.27654382136094646,
+ "grad_norm": 20778.064453125,
+ "learning_rate": 9.019207043191602e-05,
+ "loss": 0.4604,
+ "step": 53600
+ },
+ {
+ "epoch": 0.27680179134355926,
+ "grad_norm": 19481.369140625,
+ "learning_rate": 9.017032998024823e-05,
+ "loss": 0.4629,
+ "step": 53650
+ },
+ {
+ "epoch": 0.2770597613261721,
+ "grad_norm": 20873.8515625,
+ "learning_rate": 9.014856808643617e-05,
+ "loss": 0.4647,
+ "step": 53700
+ },
+ {
+ "epoch": 0.2773177313087849,
+ "grad_norm": 21859.05078125,
+ "learning_rate": 9.012678476209591e-05,
+ "loss": 0.4621,
+ "step": 53750
+ },
+ {
+ "epoch": 0.27757570129139775,
+ "grad_norm": 20832.587890625,
+ "learning_rate": 9.010498001885492e-05,
+ "loss": 0.463,
+ "step": 53800
+ },
+ {
+ "epoch": 0.27783367127401054,
+ "grad_norm": 18435.703125,
+ "learning_rate": 9.00831538683522e-05,
+ "loss": 0.466,
+ "step": 53850
+ },
+ {
+ "epoch": 0.2780916412566234,
+ "grad_norm": 21496.61328125,
+ "learning_rate": 9.006130632223811e-05,
+ "loss": 0.4611,
+ "step": 53900
+ },
+ {
+ "epoch": 0.2783496112392362,
+ "grad_norm": 21796.873046875,
+ "learning_rate": 9.003943739217444e-05,
+ "loss": 0.4587,
+ "step": 53950
+ },
+ {
+ "epoch": 0.27860758122184903,
+ "grad_norm": 21053.099609375,
+ "learning_rate": 9.001754708983443e-05,
+ "loss": 0.4659,
+ "step": 54000
+ },
+ {
+ "epoch": 0.2788655512044618,
+ "grad_norm": 20332.98828125,
+ "learning_rate": 8.999563542690266e-05,
+ "loss": 0.4586,
+ "step": 54050
+ },
+ {
+ "epoch": 0.2791235211870747,
+ "grad_norm": 19829.93359375,
+ "learning_rate": 8.997370241507516e-05,
+ "loss": 0.4608,
+ "step": 54100
+ },
+ {
+ "epoch": 0.27938149116968747,
+ "grad_norm": 21215.3515625,
+ "learning_rate": 8.995174806605937e-05,
+ "loss": 0.4672,
+ "step": 54150
+ },
+ {
+ "epoch": 0.2796394611523003,
+ "grad_norm": 19068.890625,
+ "learning_rate": 8.992977239157408e-05,
+ "loss": 0.4637,
+ "step": 54200
+ },
+ {
+ "epoch": 0.27989743113491317,
+ "grad_norm": 20632.857421875,
+ "learning_rate": 8.99077754033495e-05,
+ "loss": 0.4615,
+ "step": 54250
+ },
+ {
+ "epoch": 0.28015540111752596,
+ "grad_norm": 20244.943359375,
+ "learning_rate": 8.988575711312714e-05,
+ "loss": 0.4665,
+ "step": 54300
+ },
+ {
+ "epoch": 0.2804133711001388,
+ "grad_norm": 21873.34375,
+ "learning_rate": 8.986371753266001e-05,
+ "loss": 0.4636,
+ "step": 54350
+ },
+ {
+ "epoch": 0.2806713410827516,
+ "grad_norm": 18075.001953125,
+ "learning_rate": 8.984165667371236e-05,
+ "loss": 0.4626,
+ "step": 54400
+ },
+ {
+ "epoch": 0.28092931106536445,
+ "grad_norm": 19815.0546875,
+ "learning_rate": 8.981957454805987e-05,
+ "loss": 0.4535,
+ "step": 54450
+ },
+ {
+ "epoch": 0.28118728104797724,
+ "grad_norm": 22713.48046875,
+ "learning_rate": 8.979747116748955e-05,
+ "loss": 0.4592,
+ "step": 54500
+ },
+ {
+ "epoch": 0.2814452510305901,
+ "grad_norm": 23360.1953125,
+ "learning_rate": 8.977534654379976e-05,
+ "loss": 0.4646,
+ "step": 54550
+ },
+ {
+ "epoch": 0.2817032210132029,
+ "grad_norm": 21626.36328125,
+ "learning_rate": 8.975320068880018e-05,
+ "loss": 0.4644,
+ "step": 54600
+ },
+ {
+ "epoch": 0.28196119099581574,
+ "grad_norm": 20061.873046875,
+ "learning_rate": 8.973103361431184e-05,
+ "loss": 0.4674,
+ "step": 54650
+ },
+ {
+ "epoch": 0.28221916097842853,
+ "grad_norm": 21295.0625,
+ "learning_rate": 8.970884533216713e-05,
+ "loss": 0.4674,
+ "step": 54700
+ },
+ {
+ "epoch": 0.2824771309610414,
+ "grad_norm": 19434.23828125,
+ "learning_rate": 8.968663585420967e-05,
+ "loss": 0.46,
+ "step": 54750
+ },
+ {
+ "epoch": 0.28273510094365417,
+ "grad_norm": 23654.849609375,
+ "learning_rate": 8.966440519229449e-05,
+ "loss": 0.4649,
+ "step": 54800
+ },
+ {
+ "epoch": 0.282993070926267,
+ "grad_norm": 22763.603515625,
+ "learning_rate": 8.964215335828787e-05,
+ "loss": 0.4578,
+ "step": 54850
+ },
+ {
+ "epoch": 0.28325104090887987,
+ "grad_norm": 23262.849609375,
+ "learning_rate": 8.961988036406741e-05,
+ "loss": 0.4674,
+ "step": 54900
+ },
+ {
+ "epoch": 0.28350901089149266,
+ "grad_norm": 20148.380859375,
+ "learning_rate": 8.959758622152201e-05,
+ "loss": 0.4642,
+ "step": 54950
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "grad_norm": 22515.548828125,
+ "learning_rate": 8.957527094255186e-05,
+ "loss": 0.4697,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "eval_loss": 0.4508056044578552,
+ "eval_runtime": 3347.9938,
+ "eval_samples_per_second": 926.262,
+ "eval_steps_per_second": 1.809,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2840249508567183,
+ "grad_norm": 21158.09375,
+ "learning_rate": 8.95529345390684e-05,
+ "loss": 0.4617,
+ "step": 55050
+ },
+ {
+ "epoch": 0.28428292083933115,
+ "grad_norm": 20892.517578125,
+ "learning_rate": 8.953057702299437e-05,
+ "loss": 0.4612,
+ "step": 55100
+ },
+ {
+ "epoch": 0.28454089082194395,
+ "grad_norm": 21489.740234375,
+ "learning_rate": 8.950819840626381e-05,
+ "loss": 0.4578,
+ "step": 55150
+ },
+ {
+ "epoch": 0.2847988608045568,
+ "grad_norm": 20703.072265625,
+ "learning_rate": 8.948579870082197e-05,
+ "loss": 0.4632,
+ "step": 55200
+ },
+ {
+ "epoch": 0.2850568307871696,
+ "grad_norm": 21731.775390625,
+ "learning_rate": 8.946337791862537e-05,
+ "loss": 0.4621,
+ "step": 55250
+ },
+ {
+ "epoch": 0.28531480076978244,
+ "grad_norm": 24507.076171875,
+ "learning_rate": 8.94409360716418e-05,
+ "loss": 0.4542,
+ "step": 55300
+ },
+ {
+ "epoch": 0.28557277075239523,
+ "grad_norm": 20686.79296875,
+ "learning_rate": 8.94184731718503e-05,
+ "loss": 0.4575,
+ "step": 55350
+ },
+ {
+ "epoch": 0.2858307407350081,
+ "grad_norm": 20055.396484375,
+ "learning_rate": 8.93959892312411e-05,
+ "loss": 0.4595,
+ "step": 55400
+ },
+ {
+ "epoch": 0.2860887107176209,
+ "grad_norm": 21203.28515625,
+ "learning_rate": 8.93734842618157e-05,
+ "loss": 0.457,
+ "step": 55450
+ },
+ {
+ "epoch": 0.2863466807002337,
+ "grad_norm": 21738.6328125,
+ "learning_rate": 8.935095827558684e-05,
+ "loss": 0.4639,
+ "step": 55500
+ },
+ {
+ "epoch": 0.2866046506828465,
+ "grad_norm": 21593.056640625,
+ "learning_rate": 8.932841128457844e-05,
+ "loss": 0.4566,
+ "step": 55550
+ },
+ {
+ "epoch": 0.28686262066545937,
+ "grad_norm": 20362.564453125,
+ "learning_rate": 8.930584330082564e-05,
+ "loss": 0.4613,
+ "step": 55600
+ },
+ {
+ "epoch": 0.2871205906480722,
+ "grad_norm": 20415.390625,
+ "learning_rate": 8.928325433637482e-05,
+ "loss": 0.4591,
+ "step": 55650
+ },
+ {
+ "epoch": 0.287378560630685,
+ "grad_norm": 21615.1953125,
+ "learning_rate": 8.926064440328348e-05,
+ "loss": 0.4645,
+ "step": 55700
+ },
+ {
+ "epoch": 0.28763653061329786,
+ "grad_norm": 19537.873046875,
+ "learning_rate": 8.92380135136204e-05,
+ "loss": 0.4595,
+ "step": 55750
+ },
+ {
+ "epoch": 0.28789450059591065,
+ "grad_norm": 21288.21484375,
+ "learning_rate": 8.921536167946552e-05,
+ "loss": 0.4565,
+ "step": 55800
+ },
+ {
+ "epoch": 0.2881524705785235,
+ "grad_norm": 25019.783203125,
+ "learning_rate": 8.919268891290992e-05,
+ "loss": 0.4635,
+ "step": 55850
+ },
+ {
+ "epoch": 0.2884104405611363,
+ "grad_norm": 23099.5625,
+ "learning_rate": 8.916999522605592e-05,
+ "loss": 0.4561,
+ "step": 55900
+ },
+ {
+ "epoch": 0.28866841054374914,
+ "grad_norm": 22477.849609375,
+ "learning_rate": 8.914728063101694e-05,
+ "loss": 0.458,
+ "step": 55950
+ },
+ {
+ "epoch": 0.28892638052636194,
+ "grad_norm": 19823.103515625,
+ "learning_rate": 8.91245451399176e-05,
+ "loss": 0.457,
+ "step": 56000
+ },
+ {
+ "epoch": 0.2891843505089748,
+ "grad_norm": 20293.353515625,
+ "learning_rate": 8.910178876489368e-05,
+ "loss": 0.4614,
+ "step": 56050
+ },
+ {
+ "epoch": 0.2894423204915876,
+ "grad_norm": 19020.892578125,
+ "learning_rate": 8.907901151809205e-05,
+ "loss": 0.4597,
+ "step": 56100
+ },
+ {
+ "epoch": 0.28970029047420043,
+ "grad_norm": 20133.603515625,
+ "learning_rate": 8.905621341167082e-05,
+ "loss": 0.4577,
+ "step": 56150
+ },
+ {
+ "epoch": 0.2899582604568132,
+ "grad_norm": 21008.95703125,
+ "learning_rate": 8.903339445779915e-05,
+ "loss": 0.4596,
+ "step": 56200
+ },
+ {
+ "epoch": 0.29021623043942607,
+ "grad_norm": 21339.892578125,
+ "learning_rate": 8.901055466865735e-05,
+ "loss": 0.4631,
+ "step": 56250
+ },
+ {
+ "epoch": 0.29047420042203886,
+ "grad_norm": 20088.455078125,
+ "learning_rate": 8.898769405643686e-05,
+ "loss": 0.4571,
+ "step": 56300
+ },
+ {
+ "epoch": 0.2907321704046517,
+ "grad_norm": 21779.341796875,
+ "learning_rate": 8.896481263334023e-05,
+ "loss": 0.4541,
+ "step": 56350
+ },
+ {
+ "epoch": 0.29099014038726456,
+ "grad_norm": 24433.103515625,
+ "learning_rate": 8.894191041158113e-05,
+ "loss": 0.4627,
+ "step": 56400
+ },
+ {
+ "epoch": 0.29124811036987736,
+ "grad_norm": 22214.70703125,
+ "learning_rate": 8.891898740338432e-05,
+ "loss": 0.4585,
+ "step": 56450
+ },
+ {
+ "epoch": 0.2915060803524902,
+ "grad_norm": 20558.955078125,
+ "learning_rate": 8.889604362098567e-05,
+ "loss": 0.4547,
+ "step": 56500
+ },
+ {
+ "epoch": 0.291764050335103,
+ "grad_norm": 22438.3828125,
+ "learning_rate": 8.88730790766321e-05,
+ "loss": 0.4581,
+ "step": 56550
+ },
+ {
+ "epoch": 0.29202202031771585,
+ "grad_norm": 22429.658203125,
+ "learning_rate": 8.885009378258164e-05,
+ "loss": 0.4556,
+ "step": 56600
+ },
+ {
+ "epoch": 0.29227999030032864,
+ "grad_norm": 18076.814453125,
+ "learning_rate": 8.882708775110342e-05,
+ "loss": 0.4571,
+ "step": 56650
+ },
+ {
+ "epoch": 0.2925379602829415,
+ "grad_norm": 19816.873046875,
+ "learning_rate": 8.88040609944776e-05,
+ "loss": 0.4584,
+ "step": 56700
+ },
+ {
+ "epoch": 0.2927959302655543,
+ "grad_norm": 20448.5234375,
+ "learning_rate": 8.878101352499542e-05,
+ "loss": 0.4575,
+ "step": 56750
+ },
+ {
+ "epoch": 0.29305390024816713,
+ "grad_norm": 19950.4609375,
+ "learning_rate": 8.875794535495915e-05,
+ "loss": 0.4558,
+ "step": 56800
+ },
+ {
+ "epoch": 0.2933118702307799,
+ "grad_norm": 20185.0625,
+ "learning_rate": 8.873485649668218e-05,
+ "loss": 0.4523,
+ "step": 56850
+ },
+ {
+ "epoch": 0.2935698402133928,
+ "grad_norm": 22338.080078125,
+ "learning_rate": 8.871174696248888e-05,
+ "loss": 0.4648,
+ "step": 56900
+ },
+ {
+ "epoch": 0.29382781019600557,
+ "grad_norm": 22531.541015625,
+ "learning_rate": 8.868861676471463e-05,
+ "loss": 0.4628,
+ "step": 56950
+ },
+ {
+ "epoch": 0.2940857801786184,
+ "grad_norm": 19558.10546875,
+ "learning_rate": 8.866546591570592e-05,
+ "loss": 0.4565,
+ "step": 57000
+ },
+ {
+ "epoch": 0.29434375016123127,
+ "grad_norm": 20166.33203125,
+ "learning_rate": 8.864229442782023e-05,
+ "loss": 0.4527,
+ "step": 57050
+ },
+ {
+ "epoch": 0.29460172014384406,
+ "grad_norm": 20262.185546875,
+ "learning_rate": 8.861910231342603e-05,
+ "loss": 0.4575,
+ "step": 57100
+ },
+ {
+ "epoch": 0.2948596901264569,
+ "grad_norm": 19107.080078125,
+ "learning_rate": 8.859588958490283e-05,
+ "loss": 0.4564,
+ "step": 57150
+ },
+ {
+ "epoch": 0.2951176601090697,
+ "grad_norm": 19690.37109375,
+ "learning_rate": 8.857265625464113e-05,
+ "loss": 0.4576,
+ "step": 57200
+ },
+ {
+ "epoch": 0.29537563009168255,
+ "grad_norm": 21793.189453125,
+ "learning_rate": 8.854940233504245e-05,
+ "loss": 0.4616,
+ "step": 57250
+ },
+ {
+ "epoch": 0.29563360007429534,
+ "grad_norm": 21543.033203125,
+ "learning_rate": 8.852612783851926e-05,
+ "loss": 0.4559,
+ "step": 57300
+ },
+ {
+ "epoch": 0.2958915700569082,
+ "grad_norm": 21455.56640625,
+ "learning_rate": 8.850283277749504e-05,
+ "loss": 0.4583,
+ "step": 57350
+ },
+ {
+ "epoch": 0.296149540039521,
+ "grad_norm": 21236.935546875,
+ "learning_rate": 8.847951716440426e-05,
+ "loss": 0.46,
+ "step": 57400
+ },
+ {
+ "epoch": 0.29640751002213384,
+ "grad_norm": 22411.130859375,
+ "learning_rate": 8.845618101169232e-05,
+ "loss": 0.4563,
+ "step": 57450
+ },
+ {
+ "epoch": 0.29666548000474663,
+ "grad_norm": 19269.26171875,
+ "learning_rate": 8.843282433181561e-05,
+ "loss": 0.4634,
+ "step": 57500
+ },
+ {
+ "epoch": 0.2969234499873595,
+ "grad_norm": 22179.669921875,
+ "learning_rate": 8.840944713724149e-05,
+ "loss": 0.4582,
+ "step": 57550
+ },
+ {
+ "epoch": 0.29718141996997227,
+ "grad_norm": 19867.076171875,
+ "learning_rate": 8.838604944044825e-05,
+ "loss": 0.4591,
+ "step": 57600
+ },
+ {
+ "epoch": 0.2974393899525851,
+ "grad_norm": 19806.09375,
+ "learning_rate": 8.836263125392511e-05,
+ "loss": 0.4571,
+ "step": 57650
+ },
+ {
+ "epoch": 0.2976973599351979,
+ "grad_norm": 21762.22265625,
+ "learning_rate": 8.833919259017225e-05,
+ "loss": 0.4526,
+ "step": 57700
+ },
+ {
+ "epoch": 0.29795532991781076,
+ "grad_norm": 21031.263671875,
+ "learning_rate": 8.83157334617008e-05,
+ "loss": 0.4577,
+ "step": 57750
+ },
+ {
+ "epoch": 0.2982132999004236,
+ "grad_norm": 22886.556640625,
+ "learning_rate": 8.829225388103276e-05,
+ "loss": 0.4553,
+ "step": 57800
+ },
+ {
+ "epoch": 0.2984712698830364,
+ "grad_norm": 19710.173828125,
+ "learning_rate": 8.826875386070108e-05,
+ "loss": 0.4556,
+ "step": 57850
+ },
+ {
+ "epoch": 0.29872923986564925,
+ "grad_norm": 20607.244140625,
+ "learning_rate": 8.824523341324963e-05,
+ "loss": 0.458,
+ "step": 57900
+ },
+ {
+ "epoch": 0.29898720984826205,
+ "grad_norm": 20672.05859375,
+ "learning_rate": 8.822169255123317e-05,
+ "loss": 0.4531,
+ "step": 57950
+ },
+ {
+ "epoch": 0.2992451798308749,
+ "grad_norm": 21375.76953125,
+ "learning_rate": 8.819813128721732e-05,
+ "loss": 0.4602,
+ "step": 58000
+ },
+ {
+ "epoch": 0.2995031498134877,
+ "grad_norm": 20848.328125,
+ "learning_rate": 8.817454963377865e-05,
+ "loss": 0.4557,
+ "step": 58050
+ },
+ {
+ "epoch": 0.29976111979610054,
+ "grad_norm": 20778.619140625,
+ "learning_rate": 8.81509476035046e-05,
+ "loss": 0.4588,
+ "step": 58100
+ },
+ {
+ "epoch": 0.30001908977871333,
+ "grad_norm": 19791.296875,
+ "learning_rate": 8.812732520899347e-05,
+ "loss": 0.4609,
+ "step": 58150
+ },
+ {
+ "epoch": 0.3002770597613262,
+ "grad_norm": 21814.482421875,
+ "learning_rate": 8.810368246285445e-05,
+ "loss": 0.4597,
+ "step": 58200
+ },
+ {
+ "epoch": 0.300535029743939,
+ "grad_norm": 22417.65625,
+ "learning_rate": 8.808001937770755e-05,
+ "loss": 0.461,
+ "step": 58250
+ },
+ {
+ "epoch": 0.3007929997265518,
+ "grad_norm": 21347.53515625,
+ "learning_rate": 8.80563359661837e-05,
+ "loss": 0.4523,
+ "step": 58300
+ },
+ {
+ "epoch": 0.3010509697091646,
+ "grad_norm": 21612.689453125,
+ "learning_rate": 8.803263224092461e-05,
+ "loss": 0.4588,
+ "step": 58350
+ },
+ {
+ "epoch": 0.30130893969177747,
+ "grad_norm": 19139.7109375,
+ "learning_rate": 8.80089082145829e-05,
+ "loss": 0.4576,
+ "step": 58400
+ },
+ {
+ "epoch": 0.3015669096743903,
+ "grad_norm": 21629.78125,
+ "learning_rate": 8.798516389982197e-05,
+ "loss": 0.4514,
+ "step": 58450
+ },
+ {
+ "epoch": 0.3018248796570031,
+ "grad_norm": 20307.630859375,
+ "learning_rate": 8.79613993093161e-05,
+ "loss": 0.4606,
+ "step": 58500
+ },
+ {
+ "epoch": 0.30208284963961596,
+ "grad_norm": 17832.3359375,
+ "learning_rate": 8.793761445575037e-05,
+ "loss": 0.4654,
+ "step": 58550
+ },
+ {
+ "epoch": 0.30234081962222875,
+ "grad_norm": 19975.20703125,
+ "learning_rate": 8.791380935182065e-05,
+ "loss": 0.4519,
+ "step": 58600
+ },
+ {
+ "epoch": 0.3025987896048416,
+ "grad_norm": 23387.681640625,
+ "learning_rate": 8.788998401023365e-05,
+ "loss": 0.4576,
+ "step": 58650
+ },
+ {
+ "epoch": 0.3028567595874544,
+ "grad_norm": 18704.669921875,
+ "learning_rate": 8.78661384437069e-05,
+ "loss": 0.4634,
+ "step": 58700
+ },
+ {
+ "epoch": 0.30311472957006724,
+ "grad_norm": 21739.806640625,
+ "learning_rate": 8.784227266496868e-05,
+ "loss": 0.4471,
+ "step": 58750
+ },
+ {
+ "epoch": 0.30337269955268004,
+ "grad_norm": 22190.74609375,
+ "learning_rate": 8.781838668675806e-05,
+ "loss": 0.4508,
+ "step": 58800
+ },
+ {
+ "epoch": 0.3036306695352929,
+ "grad_norm": 19186.9609375,
+ "learning_rate": 8.779448052182495e-05,
+ "loss": 0.4575,
+ "step": 58850
+ },
+ {
+ "epoch": 0.3038886395179057,
+ "grad_norm": 21925.8984375,
+ "learning_rate": 8.777055418293e-05,
+ "loss": 0.4614,
+ "step": 58900
+ },
+ {
+ "epoch": 0.3041466095005185,
+ "grad_norm": 21280.16796875,
+ "learning_rate": 8.774660768284459e-05,
+ "loss": 0.4621,
+ "step": 58950
+ },
+ {
+ "epoch": 0.3044045794831313,
+ "grad_norm": 19872.3828125,
+ "learning_rate": 8.772264103435094e-05,
+ "loss": 0.4617,
+ "step": 59000
+ },
+ {
+ "epoch": 0.30466254946574417,
+ "grad_norm": 17518.58984375,
+ "learning_rate": 8.769865425024195e-05,
+ "loss": 0.4548,
+ "step": 59050
+ },
+ {
+ "epoch": 0.30492051944835696,
+ "grad_norm": 25605.537109375,
+ "learning_rate": 8.767464734332131e-05,
+ "loss": 0.4532,
+ "step": 59100
+ },
+ {
+ "epoch": 0.3051784894309698,
+ "grad_norm": 20151.53515625,
+ "learning_rate": 8.765062032640346e-05,
+ "loss": 0.4558,
+ "step": 59150
+ },
+ {
+ "epoch": 0.30543645941358266,
+ "grad_norm": 19346.048828125,
+ "learning_rate": 8.762657321231353e-05,
+ "loss": 0.4624,
+ "step": 59200
+ },
+ {
+ "epoch": 0.30569442939619546,
+ "grad_norm": 21447.115234375,
+ "learning_rate": 8.760250601388741e-05,
+ "loss": 0.4632,
+ "step": 59250
+ },
+ {
+ "epoch": 0.3059523993788083,
+ "grad_norm": 19053.896484375,
+ "learning_rate": 8.757841874397172e-05,
+ "loss": 0.454,
+ "step": 59300
+ },
+ {
+ "epoch": 0.3062103693614211,
+ "grad_norm": 20928.8515625,
+ "learning_rate": 8.755431141542376e-05,
+ "loss": 0.4509,
+ "step": 59350
+ },
+ {
+ "epoch": 0.30646833934403395,
+ "grad_norm": 20900.40234375,
+ "learning_rate": 8.753018404111157e-05,
+ "loss": 0.4523,
+ "step": 59400
+ },
+ {
+ "epoch": 0.30672630932664674,
+ "grad_norm": 19776.572265625,
+ "learning_rate": 8.750603663391385e-05,
+ "loss": 0.458,
+ "step": 59450
+ },
+ {
+ "epoch": 0.3069842793092596,
+ "grad_norm": 21503.505859375,
+ "learning_rate": 8.748186920672005e-05,
+ "loss": 0.4496,
+ "step": 59500
+ },
+ {
+ "epoch": 0.3072422492918724,
+ "grad_norm": 20588.5078125,
+ "learning_rate": 8.745768177243027e-05,
+ "loss": 0.4578,
+ "step": 59550
+ },
+ {
+ "epoch": 0.30750021927448523,
+ "grad_norm": 20516.150390625,
+ "learning_rate": 8.743347434395528e-05,
+ "loss": 0.46,
+ "step": 59600
+ },
+ {
+ "epoch": 0.307758189257098,
+ "grad_norm": 20487.498046875,
+ "learning_rate": 8.740924693421655e-05,
+ "loss": 0.4574,
+ "step": 59650
+ },
+ {
+ "epoch": 0.3080161592397109,
+ "grad_norm": 21070.3671875,
+ "learning_rate": 8.738499955614619e-05,
+ "loss": 0.4564,
+ "step": 59700
+ },
+ {
+ "epoch": 0.30827412922232367,
+ "grad_norm": 19067.427734375,
+ "learning_rate": 8.736073222268697e-05,
+ "loss": 0.4523,
+ "step": 59750
+ },
+ {
+ "epoch": 0.3085320992049365,
+ "grad_norm": 22084.68359375,
+ "learning_rate": 8.733644494679236e-05,
+ "loss": 0.4558,
+ "step": 59800
+ },
+ {
+ "epoch": 0.3087900691875493,
+ "grad_norm": 22324.9140625,
+ "learning_rate": 8.731213774142639e-05,
+ "loss": 0.4585,
+ "step": 59850
+ },
+ {
+ "epoch": 0.30904803917016216,
+ "grad_norm": 19219.47265625,
+ "learning_rate": 8.728781061956383e-05,
+ "loss": 0.4571,
+ "step": 59900
+ },
+ {
+ "epoch": 0.309306009152775,
+ "grad_norm": 20598.125,
+ "learning_rate": 8.726346359418998e-05,
+ "loss": 0.4581,
+ "step": 59950
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "grad_norm": 22155.720703125,
+ "learning_rate": 8.723909667830082e-05,
+ "loss": 0.4578,
+ "step": 60000
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "eval_loss": 0.44494956731796265,
+ "eval_runtime": 3261.5111,
+ "eval_samples_per_second": 950.823,
+ "eval_steps_per_second": 1.857,
+ "step": 60000
+ },
+ {
+ "epoch": 0.30982194911800065,
+ "grad_norm": 22012.822265625,
+ "learning_rate": 8.721470988490297e-05,
+ "loss": 0.4533,
+ "step": 60050
+ },
+ {
+ "epoch": 0.31007991910061344,
+ "grad_norm": 20934.453125,
+ "learning_rate": 8.719030322701358e-05,
+ "loss": 0.4538,
+ "step": 60100
+ },
+ {
+ "epoch": 0.3103378890832263,
+ "grad_norm": 20173.20703125,
+ "learning_rate": 8.716587671766049e-05,
+ "loss": 0.4559,
+ "step": 60150
+ },
+ {
+ "epoch": 0.3105958590658391,
+ "grad_norm": 19343.833984375,
+ "learning_rate": 8.714143036988208e-05,
+ "loss": 0.4579,
+ "step": 60200
+ },
+ {
+ "epoch": 0.31085382904845194,
+ "grad_norm": 20720.435546875,
+ "learning_rate": 8.711696419672734e-05,
+ "loss": 0.4529,
+ "step": 60250
+ },
+ {
+ "epoch": 0.31111179903106473,
+ "grad_norm": 22050.85546875,
+ "learning_rate": 8.709247821125583e-05,
+ "loss": 0.4505,
+ "step": 60300
+ },
+ {
+ "epoch": 0.3113697690136776,
+ "grad_norm": 22470.55078125,
+ "learning_rate": 8.706797242653773e-05,
+ "loss": 0.4616,
+ "step": 60350
+ },
+ {
+ "epoch": 0.31162773899629037,
+ "grad_norm": 21057.978515625,
+ "learning_rate": 8.70434468556537e-05,
+ "loss": 0.4568,
+ "step": 60400
+ },
+ {
+ "epoch": 0.3118857089789032,
+ "grad_norm": 21035.34375,
+ "learning_rate": 8.701890151169507e-05,
+ "loss": 0.4551,
+ "step": 60450
+ },
+ {
+ "epoch": 0.312143678961516,
+ "grad_norm": 20412.056640625,
+ "learning_rate": 8.699433640776363e-05,
+ "loss": 0.4521,
+ "step": 60500
+ },
+ {
+ "epoch": 0.31240164894412886,
+ "grad_norm": 19888.26953125,
+ "learning_rate": 8.696975155697175e-05,
+ "loss": 0.4565,
+ "step": 60550
+ },
+ {
+ "epoch": 0.3126596189267417,
+ "grad_norm": 22491.900390625,
+ "learning_rate": 8.694514697244238e-05,
+ "loss": 0.4578,
+ "step": 60600
+ },
+ {
+ "epoch": 0.3129175889093545,
+ "grad_norm": 20026.357421875,
+ "learning_rate": 8.692052266730897e-05,
+ "loss": 0.4554,
+ "step": 60650
+ },
+ {
+ "epoch": 0.31317555889196735,
+ "grad_norm": 22979.109375,
+ "learning_rate": 8.689587865471547e-05,
+ "loss": 0.461,
+ "step": 60700
+ },
+ {
+ "epoch": 0.31343352887458015,
+ "grad_norm": 21558.291015625,
+ "learning_rate": 8.68712149478164e-05,
+ "loss": 0.4546,
+ "step": 60750
+ },
+ {
+ "epoch": 0.313691498857193,
+ "grad_norm": 22115.384765625,
+ "learning_rate": 8.684653155977676e-05,
+ "loss": 0.4518,
+ "step": 60800
+ },
+ {
+ "epoch": 0.3139494688398058,
+ "grad_norm": 21422.41015625,
+ "learning_rate": 8.682182850377205e-05,
+ "loss": 0.4602,
+ "step": 60850
+ },
+ {
+ "epoch": 0.31420743882241864,
+ "grad_norm": 21101.02734375,
+ "learning_rate": 8.679710579298832e-05,
+ "loss": 0.4579,
+ "step": 60900
+ },
+ {
+ "epoch": 0.31446540880503143,
+ "grad_norm": 18844.361328125,
+ "learning_rate": 8.677236344062203e-05,
+ "loss": 0.4569,
+ "step": 60950
+ },
+ {
+ "epoch": 0.3147233787876443,
+ "grad_norm": 20492.796875,
+ "learning_rate": 8.67476014598802e-05,
+ "loss": 0.4542,
+ "step": 61000
+ },
+ {
+ "epoch": 0.3149813487702571,
+ "grad_norm": 28102.55078125,
+ "learning_rate": 8.67228198639803e-05,
+ "loss": 0.4516,
+ "step": 61050
+ },
+ {
+ "epoch": 0.3152393187528699,
+ "grad_norm": 20697.494140625,
+ "learning_rate": 8.669801866615024e-05,
+ "loss": 0.4551,
+ "step": 61100
+ },
+ {
+ "epoch": 0.3154972887354827,
+ "grad_norm": 20726.90625,
+ "learning_rate": 8.667319787962842e-05,
+ "loss": 0.4576,
+ "step": 61150
+ },
+ {
+ "epoch": 0.31575525871809557,
+ "grad_norm": 20007.04296875,
+ "learning_rate": 8.664835751766371e-05,
+ "loss": 0.4544,
+ "step": 61200
+ },
+ {
+ "epoch": 0.31601322870070836,
+ "grad_norm": 23061.224609375,
+ "learning_rate": 8.662349759351542e-05,
+ "loss": 0.458,
+ "step": 61250
+ },
+ {
+ "epoch": 0.3162711986833212,
+ "grad_norm": 19895.3125,
+ "learning_rate": 8.65986181204533e-05,
+ "loss": 0.4555,
+ "step": 61300
+ },
+ {
+ "epoch": 0.31652916866593406,
+ "grad_norm": 22702.5234375,
+ "learning_rate": 8.65737191117575e-05,
+ "loss": 0.4586,
+ "step": 61350
+ },
+ {
+ "epoch": 0.31678713864854685,
+ "grad_norm": 20045.404296875,
+ "learning_rate": 8.654880058071866e-05,
+ "loss": 0.4583,
+ "step": 61400
+ },
+ {
+ "epoch": 0.3170451086311597,
+ "grad_norm": 21180.455078125,
+ "learning_rate": 8.652386254063778e-05,
+ "loss": 0.4594,
+ "step": 61450
+ },
+ {
+ "epoch": 0.3173030786137725,
+ "grad_norm": 19104.767578125,
+ "learning_rate": 8.649890500482633e-05,
+ "loss": 0.4532,
+ "step": 61500
+ },
+ {
+ "epoch": 0.31756104859638534,
+ "grad_norm": 23137.869140625,
+ "learning_rate": 8.647392798660613e-05,
+ "loss": 0.4535,
+ "step": 61550
+ },
+ {
+ "epoch": 0.31781901857899814,
+ "grad_norm": 21784.001953125,
+ "learning_rate": 8.644893149930949e-05,
+ "loss": 0.4518,
+ "step": 61600
+ },
+ {
+ "epoch": 0.318076988561611,
+ "grad_norm": 20489.796875,
+ "learning_rate": 8.642391555627897e-05,
+ "loss": 0.4572,
+ "step": 61650
+ },
+ {
+ "epoch": 0.3183349585442238,
+ "grad_norm": 21743.728515625,
+ "learning_rate": 8.639888017086764e-05,
+ "loss": 0.4601,
+ "step": 61700
+ },
+ {
+ "epoch": 0.3185929285268366,
+ "grad_norm": 21714.6171875,
+ "learning_rate": 8.63738253564389e-05,
+ "loss": 0.4597,
+ "step": 61750
+ },
+ {
+ "epoch": 0.3188508985094494,
+ "grad_norm": 19896.208984375,
+ "learning_rate": 8.634875112636653e-05,
+ "loss": 0.4532,
+ "step": 61800
+ },
+ {
+ "epoch": 0.31910886849206227,
+ "grad_norm": 22215.173828125,
+ "learning_rate": 8.632365749403465e-05,
+ "loss": 0.4532,
+ "step": 61850
+ },
+ {
+ "epoch": 0.31936683847467506,
+ "grad_norm": 22466.958984375,
+ "learning_rate": 8.629854447283778e-05,
+ "loss": 0.4539,
+ "step": 61900
+ },
+ {
+ "epoch": 0.3196248084572879,
+ "grad_norm": 21345.197265625,
+ "learning_rate": 8.627341207618073e-05,
+ "loss": 0.4551,
+ "step": 61950
+ },
+ {
+ "epoch": 0.3198827784399007,
+ "grad_norm": 20988.8203125,
+ "learning_rate": 8.624826031747872e-05,
+ "loss": 0.4593,
+ "step": 62000
+ },
+ {
+ "epoch": 0.32014074842251355,
+ "grad_norm": 23295.70703125,
+ "learning_rate": 8.622308921015726e-05,
+ "loss": 0.4547,
+ "step": 62050
+ },
+ {
+ "epoch": 0.3203987184051264,
+ "grad_norm": 22620.431640625,
+ "learning_rate": 8.619789876765221e-05,
+ "loss": 0.4601,
+ "step": 62100
+ },
+ {
+ "epoch": 0.3206566883877392,
+ "grad_norm": 21914.44140625,
+ "learning_rate": 8.61726890034097e-05,
+ "loss": 0.4474,
+ "step": 62150
+ },
+ {
+ "epoch": 0.32091465837035205,
+ "grad_norm": 20521.265625,
+ "learning_rate": 8.614745993088626e-05,
+ "loss": 0.4565,
+ "step": 62200
+ },
+ {
+ "epoch": 0.32117262835296484,
+ "grad_norm": 22810.072265625,
+ "learning_rate": 8.612221156354868e-05,
+ "loss": 0.453,
+ "step": 62250
+ },
+ {
+ "epoch": 0.3214305983355777,
+ "grad_norm": 20862.349609375,
+ "learning_rate": 8.609694391487402e-05,
+ "loss": 0.4543,
+ "step": 62300
+ },
+ {
+ "epoch": 0.3216885683181905,
+ "grad_norm": 22115.298828125,
+ "learning_rate": 8.607165699834967e-05,
+ "loss": 0.453,
+ "step": 62350
+ },
+ {
+ "epoch": 0.32194653830080333,
+ "grad_norm": 22504.859375,
+ "learning_rate": 8.60463508274733e-05,
+ "loss": 0.4552,
+ "step": 62400
+ },
+ {
+ "epoch": 0.3222045082834161,
+ "grad_norm": 21758.9453125,
+ "learning_rate": 8.602102541575286e-05,
+ "loss": 0.4526,
+ "step": 62450
+ },
+ {
+ "epoch": 0.322462478266029,
+ "grad_norm": 20388.23828125,
+ "learning_rate": 8.599568077670654e-05,
+ "loss": 0.4522,
+ "step": 62500
+ },
+ {
+ "epoch": 0.32272044824864177,
+ "grad_norm": 22393.857421875,
+ "learning_rate": 8.597031692386286e-05,
+ "loss": 0.4457,
+ "step": 62550
+ },
+ {
+ "epoch": 0.3229784182312546,
+ "grad_norm": 22233.978515625,
+ "learning_rate": 8.594493387076052e-05,
+ "loss": 0.449,
+ "step": 62600
+ },
+ {
+ "epoch": 0.3232363882138674,
+ "grad_norm": 19831.12109375,
+ "learning_rate": 8.591953163094852e-05,
+ "loss": 0.4556,
+ "step": 62650
+ },
+ {
+ "epoch": 0.32349435819648026,
+ "grad_norm": 19109.783203125,
+ "learning_rate": 8.589411021798608e-05,
+ "loss": 0.4552,
+ "step": 62700
+ },
+ {
+ "epoch": 0.3237523281790931,
+ "grad_norm": 23053.642578125,
+ "learning_rate": 8.586866964544265e-05,
+ "loss": 0.4552,
+ "step": 62750
+ },
+ {
+ "epoch": 0.3240102981617059,
+ "grad_norm": 17938.240234375,
+ "learning_rate": 8.584320992689791e-05,
+ "loss": 0.4512,
+ "step": 62800
+ },
+ {
+ "epoch": 0.32426826814431875,
+ "grad_norm": 19569.431640625,
+ "learning_rate": 8.581773107594179e-05,
+ "loss": 0.4557,
+ "step": 62850
+ },
+ {
+ "epoch": 0.32452623812693154,
+ "grad_norm": 19247.82421875,
+ "learning_rate": 8.579223310617439e-05,
+ "loss": 0.4599,
+ "step": 62900
+ },
+ {
+ "epoch": 0.3247842081095444,
+ "grad_norm": 21565.8671875,
+ "learning_rate": 8.576671603120603e-05,
+ "loss": 0.4573,
+ "step": 62950
+ },
+ {
+ "epoch": 0.3250421780921572,
+ "grad_norm": 19029.005859375,
+ "learning_rate": 8.574117986465723e-05,
+ "loss": 0.455,
+ "step": 63000
+ },
+ {
+ "epoch": 0.32530014807477003,
+ "grad_norm": 21574.626953125,
+ "learning_rate": 8.57156246201587e-05,
+ "loss": 0.4512,
+ "step": 63050
+ },
+ {
+ "epoch": 0.32555811805738283,
+ "grad_norm": 21181.8203125,
+ "learning_rate": 8.569005031135136e-05,
+ "loss": 0.4513,
+ "step": 63100
+ },
+ {
+ "epoch": 0.3258160880399957,
+ "grad_norm": 22689.93359375,
+ "learning_rate": 8.566445695188624e-05,
+ "loss": 0.4515,
+ "step": 63150
+ },
+ {
+ "epoch": 0.32607405802260847,
+ "grad_norm": 22001.9921875,
+ "learning_rate": 8.563884455542461e-05,
+ "loss": 0.4459,
+ "step": 63200
+ },
+ {
+ "epoch": 0.3263320280052213,
+ "grad_norm": 20342.96875,
+ "learning_rate": 8.561321313563786e-05,
+ "loss": 0.4526,
+ "step": 63250
+ },
+ {
+ "epoch": 0.3265899979878341,
+ "grad_norm": 20673.75390625,
+ "learning_rate": 8.558756270620756e-05,
+ "loss": 0.4581,
+ "step": 63300
+ },
+ {
+ "epoch": 0.32684796797044696,
+ "grad_norm": 23113.490234375,
+ "learning_rate": 8.556189328082538e-05,
+ "loss": 0.4525,
+ "step": 63350
+ },
+ {
+ "epoch": 0.32710593795305976,
+ "grad_norm": 21878.384765625,
+ "learning_rate": 8.55362048731932e-05,
+ "loss": 0.4536,
+ "step": 63400
+ },
+ {
+ "epoch": 0.3273639079356726,
+ "grad_norm": 22787.79296875,
+ "learning_rate": 8.551049749702297e-05,
+ "loss": 0.4586,
+ "step": 63450
+ },
+ {
+ "epoch": 0.32762187791828545,
+ "grad_norm": 20422.0625,
+ "learning_rate": 8.548477116603679e-05,
+ "loss": 0.4496,
+ "step": 63500
+ },
+ {
+ "epoch": 0.32787984790089825,
+ "grad_norm": 21936.8828125,
+ "learning_rate": 8.54590258939669e-05,
+ "loss": 0.4509,
+ "step": 63550
+ },
+ {
+ "epoch": 0.3281378178835111,
+ "grad_norm": 21049.275390625,
+ "learning_rate": 8.54332616945556e-05,
+ "loss": 0.4514,
+ "step": 63600
+ },
+ {
+ "epoch": 0.3283957878661239,
+ "grad_norm": 22976.1015625,
+ "learning_rate": 8.540747858155533e-05,
+ "loss": 0.4611,
+ "step": 63650
+ },
+ {
+ "epoch": 0.32865375784873674,
+ "grad_norm": 21968.18359375,
+ "learning_rate": 8.538167656872861e-05,
+ "loss": 0.4557,
+ "step": 63700
+ },
+ {
+ "epoch": 0.32891172783134953,
+ "grad_norm": 22231.755859375,
+ "learning_rate": 8.53558556698481e-05,
+ "loss": 0.4556,
+ "step": 63750
+ },
+ {
+ "epoch": 0.3291696978139624,
+ "grad_norm": 21183.978515625,
+ "learning_rate": 8.533001589869643e-05,
+ "loss": 0.4479,
+ "step": 63800
+ },
+ {
+ "epoch": 0.3294276677965752,
+ "grad_norm": 23931.5234375,
+ "learning_rate": 8.530415726906642e-05,
+ "loss": 0.4533,
+ "step": 63850
+ },
+ {
+ "epoch": 0.329685637779188,
+ "grad_norm": 21073.62890625,
+ "learning_rate": 8.527827979476087e-05,
+ "loss": 0.4577,
+ "step": 63900
+ },
+ {
+ "epoch": 0.3299436077618008,
+ "grad_norm": 19957.09375,
+ "learning_rate": 8.525238348959268e-05,
+ "loss": 0.4486,
+ "step": 63950
+ },
+ {
+ "epoch": 0.33020157774441367,
+ "grad_norm": 18999.962890625,
+ "learning_rate": 8.522646836738482e-05,
+ "loss": 0.4525,
+ "step": 64000
+ },
+ {
+ "epoch": 0.33045954772702646,
+ "grad_norm": 24102.1640625,
+ "learning_rate": 8.520053444197026e-05,
+ "loss": 0.4545,
+ "step": 64050
+ },
+ {
+ "epoch": 0.3307175177096393,
+ "grad_norm": 20205.65234375,
+ "learning_rate": 8.517458172719203e-05,
+ "loss": 0.4539,
+ "step": 64100
+ },
+ {
+ "epoch": 0.33097548769225216,
+ "grad_norm": 24099.8203125,
+ "learning_rate": 8.514861023690321e-05,
+ "loss": 0.4465,
+ "step": 64150
+ },
+ {
+ "epoch": 0.33123345767486495,
+ "grad_norm": 19802.203125,
+ "learning_rate": 8.512261998496685e-05,
+ "loss": 0.4546,
+ "step": 64200
+ },
+ {
+ "epoch": 0.3314914276574778,
+ "grad_norm": 23137.609375,
+ "learning_rate": 8.509661098525603e-05,
+ "loss": 0.4539,
+ "step": 64250
+ },
+ {
+ "epoch": 0.3317493976400906,
+ "grad_norm": 23578.609375,
+ "learning_rate": 8.507058325165391e-05,
+ "loss": 0.4513,
+ "step": 64300
+ },
+ {
+ "epoch": 0.33200736762270344,
+ "grad_norm": 19172.0859375,
+ "learning_rate": 8.504453679805353e-05,
+ "loss": 0.456,
+ "step": 64350
+ },
+ {
+ "epoch": 0.33226533760531624,
+ "grad_norm": 19165.775390625,
+ "learning_rate": 8.5018471638358e-05,
+ "loss": 0.4578,
+ "step": 64400
+ },
+ {
+ "epoch": 0.3325233075879291,
+ "grad_norm": 18070.72265625,
+ "learning_rate": 8.49923877864804e-05,
+ "loss": 0.4608,
+ "step": 64450
+ },
+ {
+ "epoch": 0.3327812775705419,
+ "grad_norm": 20918.525390625,
+ "learning_rate": 8.49662852563438e-05,
+ "loss": 0.4526,
+ "step": 64500
+ },
+ {
+ "epoch": 0.3330392475531547,
+ "grad_norm": 21165.05078125,
+ "learning_rate": 8.494016406188121e-05,
+ "loss": 0.4503,
+ "step": 64550
+ },
+ {
+ "epoch": 0.3332972175357675,
+ "grad_norm": 19273.013671875,
+ "learning_rate": 8.491402421703562e-05,
+ "loss": 0.4572,
+ "step": 64600
+ },
+ {
+ "epoch": 0.33355518751838037,
+ "grad_norm": 21221.681640625,
+ "learning_rate": 8.488786573575998e-05,
+ "loss": 0.456,
+ "step": 64650
+ },
+ {
+ "epoch": 0.33381315750099316,
+ "grad_norm": 19485.8125,
+ "learning_rate": 8.486168863201716e-05,
+ "loss": 0.4423,
+ "step": 64700
+ },
+ {
+ "epoch": 0.334071127483606,
+ "grad_norm": 23241.580078125,
+ "learning_rate": 8.483549291978001e-05,
+ "loss": 0.4531,
+ "step": 64750
+ },
+ {
+ "epoch": 0.3343290974662188,
+ "grad_norm": 21281.111328125,
+ "learning_rate": 8.48092786130313e-05,
+ "loss": 0.452,
+ "step": 64800
+ },
+ {
+ "epoch": 0.33458706744883165,
+ "grad_norm": 21610.2578125,
+ "learning_rate": 8.47830457257637e-05,
+ "loss": 0.4488,
+ "step": 64850
+ },
+ {
+ "epoch": 0.3348450374314445,
+ "grad_norm": 19343.466796875,
+ "learning_rate": 8.475679427197982e-05,
+ "loss": 0.4514,
+ "step": 64900
+ },
+ {
+ "epoch": 0.3351030074140573,
+ "grad_norm": 19489.1875,
+ "learning_rate": 8.473052426569219e-05,
+ "loss": 0.447,
+ "step": 64950
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "grad_norm": 24805.84765625,
+ "learning_rate": 8.470423572092323e-05,
+ "loss": 0.4594,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "eval_loss": 0.440469890832901,
+ "eval_runtime": 3318.76,
+ "eval_samples_per_second": 934.421,
+ "eval_steps_per_second": 1.825,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33561894737928294,
+ "grad_norm": 22912.732421875,
+ "learning_rate": 8.467792865170525e-05,
+ "loss": 0.4435,
+ "step": 65050
+ },
+ {
+ "epoch": 0.3358769173618958,
+ "grad_norm": 19958.994140625,
+ "learning_rate": 8.465160307208045e-05,
+ "loss": 0.4588,
+ "step": 65100
+ },
+ {
+ "epoch": 0.3361348873445086,
+ "grad_norm": 20914.193359375,
+ "learning_rate": 8.462525899610092e-05,
+ "loss": 0.4497,
+ "step": 65150
+ },
+ {
+ "epoch": 0.33639285732712143,
+ "grad_norm": 20505.814453125,
+ "learning_rate": 8.459889643782861e-05,
+ "loss": 0.4569,
+ "step": 65200
+ },
+ {
+ "epoch": 0.3366508273097342,
+ "grad_norm": 19486.068359375,
+ "learning_rate": 8.457251541133535e-05,
+ "loss": 0.4505,
+ "step": 65250
+ },
+ {
+ "epoch": 0.3369087972923471,
+ "grad_norm": 21967.84765625,
+ "learning_rate": 8.454611593070284e-05,
+ "loss": 0.4556,
+ "step": 65300
+ },
+ {
+ "epoch": 0.33716676727495987,
+ "grad_norm": 21949.767578125,
+ "learning_rate": 8.451969801002258e-05,
+ "loss": 0.4491,
+ "step": 65350
+ },
+ {
+ "epoch": 0.3374247372575727,
+ "grad_norm": 19765.14453125,
+ "learning_rate": 8.449326166339595e-05,
+ "loss": 0.4507,
+ "step": 65400
+ },
+ {
+ "epoch": 0.3376827072401855,
+ "grad_norm": 21396.982421875,
+ "learning_rate": 8.446680690493417e-05,
+ "loss": 0.4548,
+ "step": 65450
+ },
+ {
+ "epoch": 0.33794067722279836,
+ "grad_norm": 22511.8359375,
+ "learning_rate": 8.444033374875828e-05,
+ "loss": 0.454,
+ "step": 65500
+ },
+ {
+ "epoch": 0.33819864720541115,
+ "grad_norm": 21264.076171875,
+ "learning_rate": 8.441384220899912e-05,
+ "loss": 0.4486,
+ "step": 65550
+ },
+ {
+ "epoch": 0.338456617188024,
+ "grad_norm": 20736.046875,
+ "learning_rate": 8.438733229979741e-05,
+ "loss": 0.4505,
+ "step": 65600
+ },
+ {
+ "epoch": 0.33871458717063685,
+ "grad_norm": 20183.8359375,
+ "learning_rate": 8.436080403530356e-05,
+ "loss": 0.4485,
+ "step": 65650
+ },
+ {
+ "epoch": 0.33897255715324964,
+ "grad_norm": 21947.3671875,
+ "learning_rate": 8.433425742967787e-05,
+ "loss": 0.4499,
+ "step": 65700
+ },
+ {
+ "epoch": 0.3392305271358625,
+ "grad_norm": 22621.236328125,
+ "learning_rate": 8.430769249709042e-05,
+ "loss": 0.4503,
+ "step": 65750
+ },
+ {
+ "epoch": 0.3394884971184753,
+ "grad_norm": 21537.947265625,
+ "learning_rate": 8.428110925172103e-05,
+ "loss": 0.4634,
+ "step": 65800
+ },
+ {
+ "epoch": 0.33974646710108813,
+ "grad_norm": 20869.759765625,
+ "learning_rate": 8.425450770775936e-05,
+ "loss": 0.4504,
+ "step": 65850
+ },
+ {
+ "epoch": 0.34000443708370093,
+ "grad_norm": 20865.12109375,
+ "learning_rate": 8.422788787940477e-05,
+ "loss": 0.4509,
+ "step": 65900
+ },
+ {
+ "epoch": 0.3402624070663138,
+ "grad_norm": 23897.974609375,
+ "learning_rate": 8.42012497808664e-05,
+ "loss": 0.4512,
+ "step": 65950
+ },
+ {
+ "epoch": 0.34052037704892657,
+ "grad_norm": 23978.56640625,
+ "learning_rate": 8.417459342636318e-05,
+ "loss": 0.4513,
+ "step": 66000
+ },
+ {
+ "epoch": 0.3407783470315394,
+ "grad_norm": 22806.99609375,
+ "learning_rate": 8.414791883012374e-05,
+ "loss": 0.4468,
+ "step": 66050
+ },
+ {
+ "epoch": 0.3410363170141522,
+ "grad_norm": 20348.841796875,
+ "learning_rate": 8.412122600638646e-05,
+ "loss": 0.4484,
+ "step": 66100
+ },
+ {
+ "epoch": 0.34129428699676506,
+ "grad_norm": 21868.353515625,
+ "learning_rate": 8.409451496939945e-05,
+ "loss": 0.4601,
+ "step": 66150
+ },
+ {
+ "epoch": 0.34155225697937786,
+ "grad_norm": 20312.36328125,
+ "learning_rate": 8.406778573342055e-05,
+ "loss": 0.4485,
+ "step": 66200
+ },
+ {
+ "epoch": 0.3418102269619907,
+ "grad_norm": 25603.419921875,
+ "learning_rate": 8.404103831271733e-05,
+ "loss": 0.4487,
+ "step": 66250
+ },
+ {
+ "epoch": 0.34206819694460355,
+ "grad_norm": 21330.416015625,
+ "learning_rate": 8.4014272721567e-05,
+ "loss": 0.449,
+ "step": 66300
+ },
+ {
+ "epoch": 0.34232616692721635,
+ "grad_norm": 20045.4453125,
+ "learning_rate": 8.398748897425656e-05,
+ "loss": 0.447,
+ "step": 66350
+ },
+ {
+ "epoch": 0.3425841369098292,
+ "grad_norm": 21575.642578125,
+ "learning_rate": 8.396068708508262e-05,
+ "loss": 0.4495,
+ "step": 66400
+ },
+ {
+ "epoch": 0.342842106892442,
+ "grad_norm": 20396.5390625,
+ "learning_rate": 8.393386706835154e-05,
+ "loss": 0.4478,
+ "step": 66450
+ },
+ {
+ "epoch": 0.34310007687505484,
+ "grad_norm": 20366.8046875,
+ "learning_rate": 8.390702893837929e-05,
+ "loss": 0.4531,
+ "step": 66500
+ },
+ {
+ "epoch": 0.34335804685766763,
+ "grad_norm": 23514.521484375,
+ "learning_rate": 8.388017270949158e-05,
+ "loss": 0.4496,
+ "step": 66550
+ },
+ {
+ "epoch": 0.3436160168402805,
+ "grad_norm": 23656.869140625,
+ "learning_rate": 8.385329839602372e-05,
+ "loss": 0.448,
+ "step": 66600
+ },
+ {
+ "epoch": 0.3438739868228933,
+ "grad_norm": 23712.216796875,
+ "learning_rate": 8.382640601232071e-05,
+ "loss": 0.4502,
+ "step": 66650
+ },
+ {
+ "epoch": 0.3441319568055061,
+ "grad_norm": 23220.240234375,
+ "learning_rate": 8.379949557273717e-05,
+ "loss": 0.4469,
+ "step": 66700
+ },
+ {
+ "epoch": 0.3443899267881189,
+ "grad_norm": 21469.244140625,
+ "learning_rate": 8.37725670916374e-05,
+ "loss": 0.4506,
+ "step": 66750
+ },
+ {
+ "epoch": 0.34464789677073177,
+ "grad_norm": 19195.431640625,
+ "learning_rate": 8.374562058339528e-05,
+ "loss": 0.4494,
+ "step": 66800
+ },
+ {
+ "epoch": 0.34490586675334456,
+ "grad_norm": 21464.130859375,
+ "learning_rate": 8.371865606239433e-05,
+ "loss": 0.4552,
+ "step": 66850
+ },
+ {
+ "epoch": 0.3451638367359574,
+ "grad_norm": 23449.76953125,
+ "learning_rate": 8.36916735430277e-05,
+ "loss": 0.4513,
+ "step": 66900
+ },
+ {
+ "epoch": 0.3454218067185702,
+ "grad_norm": 20593.39453125,
+ "learning_rate": 8.366467303969814e-05,
+ "loss": 0.447,
+ "step": 66950
+ },
+ {
+ "epoch": 0.34567977670118305,
+ "grad_norm": 21341.72265625,
+ "learning_rate": 8.3637654566818e-05,
+ "loss": 0.4448,
+ "step": 67000
+ },
+ {
+ "epoch": 0.3459377466837959,
+ "grad_norm": 20746.919921875,
+ "learning_rate": 8.361061813880919e-05,
+ "loss": 0.4511,
+ "step": 67050
+ },
+ {
+ "epoch": 0.3461957166664087,
+ "grad_norm": 19786.162109375,
+ "learning_rate": 8.358356377010325e-05,
+ "loss": 0.452,
+ "step": 67100
+ },
+ {
+ "epoch": 0.34645368664902154,
+ "grad_norm": 20875.25,
+ "learning_rate": 8.355649147514128e-05,
+ "loss": 0.4491,
+ "step": 67150
+ },
+ {
+ "epoch": 0.34671165663163434,
+ "grad_norm": 22833.728515625,
+ "learning_rate": 8.352940126837394e-05,
+ "loss": 0.4545,
+ "step": 67200
+ },
+ {
+ "epoch": 0.3469696266142472,
+ "grad_norm": 21289.896484375,
+ "learning_rate": 8.350229316426146e-05,
+ "loss": 0.4451,
+ "step": 67250
+ },
+ {
+ "epoch": 0.34722759659686,
+ "grad_norm": 23276.080078125,
+ "learning_rate": 8.347516717727363e-05,
+ "loss": 0.4468,
+ "step": 67300
+ },
+ {
+ "epoch": 0.3474855665794728,
+ "grad_norm": 22568.234375,
+ "learning_rate": 8.344802332188977e-05,
+ "loss": 0.4455,
+ "step": 67350
+ },
+ {
+ "epoch": 0.3477435365620856,
+ "grad_norm": 19527.234375,
+ "learning_rate": 8.342086161259874e-05,
+ "loss": 0.4511,
+ "step": 67400
+ },
+ {
+ "epoch": 0.34800150654469847,
+ "grad_norm": 21764.56640625,
+ "learning_rate": 8.339368206389895e-05,
+ "loss": 0.4481,
+ "step": 67450
+ },
+ {
+ "epoch": 0.34825947652731126,
+ "grad_norm": 21142.33984375,
+ "learning_rate": 8.336648469029829e-05,
+ "loss": 0.4539,
+ "step": 67500
+ },
+ {
+ "epoch": 0.3485174465099241,
+ "grad_norm": 21612.60546875,
+ "learning_rate": 8.333926950631421e-05,
+ "loss": 0.4497,
+ "step": 67550
+ },
+ {
+ "epoch": 0.3487754164925369,
+ "grad_norm": 20772.0390625,
+ "learning_rate": 8.331203652647364e-05,
+ "loss": 0.458,
+ "step": 67600
+ },
+ {
+ "epoch": 0.34903338647514975,
+ "grad_norm": 22197.166015625,
+ "learning_rate": 8.328478576531303e-05,
+ "loss": 0.4499,
+ "step": 67650
+ },
+ {
+ "epoch": 0.34929135645776255,
+ "grad_norm": 20853.865234375,
+ "learning_rate": 8.32575172373783e-05,
+ "loss": 0.4473,
+ "step": 67700
+ },
+ {
+ "epoch": 0.3495493264403754,
+ "grad_norm": 19692.892578125,
+ "learning_rate": 8.323023095722486e-05,
+ "loss": 0.4516,
+ "step": 67750
+ },
+ {
+ "epoch": 0.34980729642298825,
+ "grad_norm": 22032.115234375,
+ "learning_rate": 8.32029269394176e-05,
+ "loss": 0.4452,
+ "step": 67800
+ },
+ {
+ "epoch": 0.35006526640560104,
+ "grad_norm": 23928.783203125,
+ "learning_rate": 8.317560519853089e-05,
+ "loss": 0.4489,
+ "step": 67850
+ },
+ {
+ "epoch": 0.3503232363882139,
+ "grad_norm": 20832.560546875,
+ "learning_rate": 8.314826574914853e-05,
+ "loss": 0.4493,
+ "step": 67900
+ },
+ {
+ "epoch": 0.3505812063708267,
+ "grad_norm": 23453.634765625,
+ "learning_rate": 8.31209086058638e-05,
+ "loss": 0.4487,
+ "step": 67950
+ },
+ {
+ "epoch": 0.35083917635343953,
+ "grad_norm": 23585.826171875,
+ "learning_rate": 8.309353378327938e-05,
+ "loss": 0.4473,
+ "step": 68000
+ },
+ {
+ "epoch": 0.3510971463360523,
+ "grad_norm": 21680.953125,
+ "learning_rate": 8.306614129600745e-05,
+ "loss": 0.4494,
+ "step": 68050
+ },
+ {
+ "epoch": 0.3513551163186652,
+ "grad_norm": 19228.56640625,
+ "learning_rate": 8.303873115866958e-05,
+ "loss": 0.4483,
+ "step": 68100
+ },
+ {
+ "epoch": 0.35161308630127797,
+ "grad_norm": 22056.6328125,
+ "learning_rate": 8.301130338589679e-05,
+ "loss": 0.4485,
+ "step": 68150
+ },
+ {
+ "epoch": 0.3518710562838908,
+ "grad_norm": 22030.484375,
+ "learning_rate": 8.298385799232947e-05,
+ "loss": 0.4462,
+ "step": 68200
+ },
+ {
+ "epoch": 0.3521290262665036,
+ "grad_norm": 19658.33984375,
+ "learning_rate": 8.295639499261745e-05,
+ "loss": 0.4444,
+ "step": 68250
+ },
+ {
+ "epoch": 0.35238699624911646,
+ "grad_norm": 19667.8125,
+ "learning_rate": 8.292891440141997e-05,
+ "loss": 0.4482,
+ "step": 68300
+ },
+ {
+ "epoch": 0.35264496623172925,
+ "grad_norm": 20248.193359375,
+ "learning_rate": 8.290141623340558e-05,
+ "loss": 0.454,
+ "step": 68350
+ },
+ {
+ "epoch": 0.3529029362143421,
+ "grad_norm": 21358.89453125,
+ "learning_rate": 8.287390050325232e-05,
+ "loss": 0.4485,
+ "step": 68400
+ },
+ {
+ "epoch": 0.35316090619695495,
+ "grad_norm": 19209.328125,
+ "learning_rate": 8.284636722564754e-05,
+ "loss": 0.4505,
+ "step": 68450
+ },
+ {
+ "epoch": 0.35341887617956774,
+ "grad_norm": 21890.7109375,
+ "learning_rate": 8.281881641528795e-05,
+ "loss": 0.4531,
+ "step": 68500
+ },
+ {
+ "epoch": 0.3536768461621806,
+ "grad_norm": 20904.052734375,
+ "learning_rate": 8.279124808687967e-05,
+ "loss": 0.4494,
+ "step": 68550
+ },
+ {
+ "epoch": 0.3539348161447934,
+ "grad_norm": 22519.888671875,
+ "learning_rate": 8.276366225513812e-05,
+ "loss": 0.4422,
+ "step": 68600
+ },
+ {
+ "epoch": 0.35419278612740623,
+ "grad_norm": 20027.009765625,
+ "learning_rate": 8.27360589347881e-05,
+ "loss": 0.4484,
+ "step": 68650
+ },
+ {
+ "epoch": 0.354450756110019,
+ "grad_norm": 22069.64453125,
+ "learning_rate": 8.27084381405637e-05,
+ "loss": 0.443,
+ "step": 68700
+ },
+ {
+ "epoch": 0.3547087260926319,
+ "grad_norm": 23096.74609375,
+ "learning_rate": 8.26807998872084e-05,
+ "loss": 0.4437,
+ "step": 68750
+ },
+ {
+ "epoch": 0.35496669607524467,
+ "grad_norm": 19204.626953125,
+ "learning_rate": 8.265314418947494e-05,
+ "loss": 0.4496,
+ "step": 68800
+ },
+ {
+ "epoch": 0.3552246660578575,
+ "grad_norm": 26871.888671875,
+ "learning_rate": 8.262547106212541e-05,
+ "loss": 0.446,
+ "step": 68850
+ },
+ {
+ "epoch": 0.3554826360404703,
+ "grad_norm": 21342.556640625,
+ "learning_rate": 8.259778051993118e-05,
+ "loss": 0.4525,
+ "step": 68900
+ },
+ {
+ "epoch": 0.35574060602308316,
+ "grad_norm": 23054.814453125,
+ "learning_rate": 8.25700725776729e-05,
+ "loss": 0.4427,
+ "step": 68950
+ },
+ {
+ "epoch": 0.35599857600569595,
+ "grad_norm": 20473.818359375,
+ "learning_rate": 8.254234725014061e-05,
+ "loss": 0.4452,
+ "step": 69000
+ },
+ {
+ "epoch": 0.3562565459883088,
+ "grad_norm": 22081.576171875,
+ "learning_rate": 8.251460455213347e-05,
+ "loss": 0.4533,
+ "step": 69050
+ },
+ {
+ "epoch": 0.3565145159709216,
+ "grad_norm": 21840.048828125,
+ "learning_rate": 8.248684449846004e-05,
+ "loss": 0.4503,
+ "step": 69100
+ },
+ {
+ "epoch": 0.35677248595353445,
+ "grad_norm": 21595.234375,
+ "learning_rate": 8.245906710393808e-05,
+ "loss": 0.4459,
+ "step": 69150
+ },
+ {
+ "epoch": 0.3570304559361473,
+ "grad_norm": 22540.302734375,
+ "learning_rate": 8.243127238339463e-05,
+ "loss": 0.4461,
+ "step": 69200
+ },
+ {
+ "epoch": 0.3572884259187601,
+ "grad_norm": 20646.5859375,
+ "learning_rate": 8.2403460351666e-05,
+ "loss": 0.4522,
+ "step": 69250
+ },
+ {
+ "epoch": 0.35754639590137294,
+ "grad_norm": 20219.978515625,
+ "learning_rate": 8.237563102359767e-05,
+ "loss": 0.4464,
+ "step": 69300
+ },
+ {
+ "epoch": 0.35780436588398573,
+ "grad_norm": 21399.888671875,
+ "learning_rate": 8.234778441404441e-05,
+ "loss": 0.451,
+ "step": 69350
+ },
+ {
+ "epoch": 0.3580623358665986,
+ "grad_norm": 23263.193359375,
+ "learning_rate": 8.231992053787024e-05,
+ "loss": 0.4491,
+ "step": 69400
+ },
+ {
+ "epoch": 0.3583203058492114,
+ "grad_norm": 20740.455078125,
+ "learning_rate": 8.229203940994829e-05,
+ "loss": 0.4456,
+ "step": 69450
+ },
+ {
+ "epoch": 0.3585782758318242,
+ "grad_norm": 21715.078125,
+ "learning_rate": 8.226414104516102e-05,
+ "loss": 0.4467,
+ "step": 69500
+ },
+ {
+ "epoch": 0.358836245814437,
+ "grad_norm": 19771.517578125,
+ "learning_rate": 8.223622545840001e-05,
+ "loss": 0.4505,
+ "step": 69550
+ },
+ {
+ "epoch": 0.35909421579704986,
+ "grad_norm": 20944.298828125,
+ "learning_rate": 8.220829266456608e-05,
+ "loss": 0.4481,
+ "step": 69600
+ },
+ {
+ "epoch": 0.35935218577966266,
+ "grad_norm": 22313.017578125,
+ "learning_rate": 8.21803426785692e-05,
+ "loss": 0.4503,
+ "step": 69650
+ },
+ {
+ "epoch": 0.3596101557622755,
+ "grad_norm": 22525.5859375,
+ "learning_rate": 8.215237551532853e-05,
+ "loss": 0.4488,
+ "step": 69700
+ },
+ {
+ "epoch": 0.3598681257448883,
+ "grad_norm": 22731.85546875,
+ "learning_rate": 8.21243911897724e-05,
+ "loss": 0.4476,
+ "step": 69750
+ },
+ {
+ "epoch": 0.36012609572750115,
+ "grad_norm": 20872.9375,
+ "learning_rate": 8.20963897168383e-05,
+ "loss": 0.4485,
+ "step": 69800
+ },
+ {
+ "epoch": 0.360384065710114,
+ "grad_norm": 21066.095703125,
+ "learning_rate": 8.206837111147289e-05,
+ "loss": 0.4511,
+ "step": 69850
+ },
+ {
+ "epoch": 0.3606420356927268,
+ "grad_norm": 21823.62890625,
+ "learning_rate": 8.204033538863197e-05,
+ "loss": 0.4415,
+ "step": 69900
+ },
+ {
+ "epoch": 0.36090000567533964,
+ "grad_norm": 19639.724609375,
+ "learning_rate": 8.201228256328042e-05,
+ "loss": 0.4456,
+ "step": 69950
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "grad_norm": 25321.20703125,
+ "learning_rate": 8.198421265039231e-05,
+ "loss": 0.4506,
+ "step": 70000
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "eval_loss": 0.43597322702407837,
+ "eval_runtime": 3285.9769,
+ "eval_samples_per_second": 943.744,
+ "eval_steps_per_second": 1.843,
+ "step": 70000
+ },
+ {
+ "epoch": 0.3614159456405653,
+ "grad_norm": 19558.943359375,
+ "learning_rate": 8.195612566495084e-05,
+ "loss": 0.4502,
+ "step": 70050
+ },
+ {
+ "epoch": 0.3616739156231781,
+ "grad_norm": 21766.482421875,
+ "learning_rate": 8.192802162194828e-05,
+ "loss": 0.4444,
+ "step": 70100
+ },
+ {
+ "epoch": 0.3619318856057909,
+ "grad_norm": 23117.017578125,
+ "learning_rate": 8.189990053638603e-05,
+ "loss": 0.4476,
+ "step": 70150
+ },
+ {
+ "epoch": 0.3621898555884037,
+ "grad_norm": 19175.60546875,
+ "learning_rate": 8.18717624232746e-05,
+ "loss": 0.4479,
+ "step": 70200
+ },
+ {
+ "epoch": 0.36244782557101657,
+ "grad_norm": 22124.80078125,
+ "learning_rate": 8.184360729763351e-05,
+ "loss": 0.449,
+ "step": 70250
+ },
+ {
+ "epoch": 0.36270579555362936,
+ "grad_norm": 21717.501953125,
+ "learning_rate": 8.181543517449147e-05,
+ "loss": 0.4488,
+ "step": 70300
+ },
+ {
+ "epoch": 0.3629637655362422,
+ "grad_norm": 20235.162109375,
+ "learning_rate": 8.178724606888621e-05,
+ "loss": 0.4496,
+ "step": 70350
+ },
+ {
+ "epoch": 0.363221735518855,
+ "grad_norm": 22513.677734375,
+ "learning_rate": 8.175903999586455e-05,
+ "loss": 0.4463,
+ "step": 70400
+ },
+ {
+ "epoch": 0.36347970550146785,
+ "grad_norm": 21388.1953125,
+ "learning_rate": 8.173081697048228e-05,
+ "loss": 0.4446,
+ "step": 70450
+ },
+ {
+ "epoch": 0.36373767548408065,
+ "grad_norm": 20549.271484375,
+ "learning_rate": 8.170257700780435e-05,
+ "loss": 0.4421,
+ "step": 70500
+ },
+ {
+ "epoch": 0.3639956454666935,
+ "grad_norm": 21219.158203125,
+ "learning_rate": 8.16743201229047e-05,
+ "loss": 0.4472,
+ "step": 70550
+ },
+ {
+ "epoch": 0.36425361544930634,
+ "grad_norm": 20570.34375,
+ "learning_rate": 8.164604633086632e-05,
+ "loss": 0.4487,
+ "step": 70600
+ },
+ {
+ "epoch": 0.36451158543191914,
+ "grad_norm": 17376.671875,
+ "learning_rate": 8.161775564678118e-05,
+ "loss": 0.4413,
+ "step": 70650
+ },
+ {
+ "epoch": 0.364769555414532,
+ "grad_norm": 21676.33984375,
+ "learning_rate": 8.158944808575032e-05,
+ "loss": 0.4433,
+ "step": 70700
+ },
+ {
+ "epoch": 0.3650275253971448,
+ "grad_norm": 21901.001953125,
+ "learning_rate": 8.156112366288378e-05,
+ "loss": 0.4465,
+ "step": 70750
+ },
+ {
+ "epoch": 0.36528549537975763,
+ "grad_norm": 20330.720703125,
+ "learning_rate": 8.153278239330056e-05,
+ "loss": 0.4456,
+ "step": 70800
+ },
+ {
+ "epoch": 0.3655434653623704,
+ "grad_norm": 22179.904296875,
+ "learning_rate": 8.15044242921287e-05,
+ "loss": 0.4465,
+ "step": 70850
+ },
+ {
+ "epoch": 0.3658014353449833,
+ "grad_norm": 21384.66015625,
+ "learning_rate": 8.14760493745052e-05,
+ "loss": 0.4476,
+ "step": 70900
+ },
+ {
+ "epoch": 0.36605940532759607,
+ "grad_norm": 21706.103515625,
+ "learning_rate": 8.144765765557604e-05,
+ "loss": 0.4475,
+ "step": 70950
+ },
+ {
+ "epoch": 0.3663173753102089,
+ "grad_norm": 20332.5,
+ "learning_rate": 8.141924915049617e-05,
+ "loss": 0.449,
+ "step": 71000
+ },
+ {
+ "epoch": 0.3665753452928217,
+ "grad_norm": 22648.640625,
+ "learning_rate": 8.139082387442951e-05,
+ "loss": 0.4566,
+ "step": 71050
+ },
+ {
+ "epoch": 0.36683331527543456,
+ "grad_norm": 21496.291015625,
+ "learning_rate": 8.136238184254892e-05,
+ "loss": 0.4493,
+ "step": 71100
+ },
+ {
+ "epoch": 0.36709128525804735,
+ "grad_norm": 22114.169921875,
+ "learning_rate": 8.133392307003618e-05,
+ "loss": 0.4441,
+ "step": 71150
+ },
+ {
+ "epoch": 0.3673492552406602,
+ "grad_norm": 22476.390625,
+ "learning_rate": 8.130544757208205e-05,
+ "loss": 0.4391,
+ "step": 71200
+ },
+ {
+ "epoch": 0.367607225223273,
+ "grad_norm": 22175.044921875,
+ "learning_rate": 8.127695536388623e-05,
+ "loss": 0.4439,
+ "step": 71250
+ },
+ {
+ "epoch": 0.36786519520588584,
+ "grad_norm": 19715.728515625,
+ "learning_rate": 8.124844646065724e-05,
+ "loss": 0.448,
+ "step": 71300
+ },
+ {
+ "epoch": 0.3681231651884987,
+ "grad_norm": 19609.146484375,
+ "learning_rate": 8.121992087761266e-05,
+ "loss": 0.4476,
+ "step": 71350
+ },
+ {
+ "epoch": 0.3683811351711115,
+ "grad_norm": 21872.12890625,
+ "learning_rate": 8.119137862997883e-05,
+ "loss": 0.4536,
+ "step": 71400
+ },
+ {
+ "epoch": 0.36863910515372433,
+ "grad_norm": 19710.619140625,
+ "learning_rate": 8.116281973299107e-05,
+ "loss": 0.4466,
+ "step": 71450
+ },
+ {
+ "epoch": 0.3688970751363371,
+ "grad_norm": 21783.138671875,
+ "learning_rate": 8.113424420189357e-05,
+ "loss": 0.4422,
+ "step": 71500
+ },
+ {
+ "epoch": 0.36915504511895,
+ "grad_norm": 20527.984375,
+ "learning_rate": 8.110565205193941e-05,
+ "loss": 0.4499,
+ "step": 71550
+ },
+ {
+ "epoch": 0.36941301510156277,
+ "grad_norm": 21693.171875,
+ "learning_rate": 8.10770432983905e-05,
+ "loss": 0.4465,
+ "step": 71600
+ },
+ {
+ "epoch": 0.3696709850841756,
+ "grad_norm": 19817.142578125,
+ "learning_rate": 8.104841795651765e-05,
+ "loss": 0.4471,
+ "step": 71650
+ },
+ {
+ "epoch": 0.3699289550667884,
+ "grad_norm": 20883.767578125,
+ "learning_rate": 8.101977604160052e-05,
+ "loss": 0.4507,
+ "step": 71700
+ },
+ {
+ "epoch": 0.37018692504940126,
+ "grad_norm": 21206.943359375,
+ "learning_rate": 8.099111756892759e-05,
+ "loss": 0.4415,
+ "step": 71750
+ },
+ {
+ "epoch": 0.37044489503201405,
+ "grad_norm": 21431.19140625,
+ "learning_rate": 8.096244255379621e-05,
+ "loss": 0.4542,
+ "step": 71800
+ },
+ {
+ "epoch": 0.3707028650146269,
+ "grad_norm": 23020.34375,
+ "learning_rate": 8.093375101151255e-05,
+ "loss": 0.4481,
+ "step": 71850
+ },
+ {
+ "epoch": 0.3709608349972397,
+ "grad_norm": 20704.1171875,
+ "learning_rate": 8.09050429573916e-05,
+ "loss": 0.4427,
+ "step": 71900
+ },
+ {
+ "epoch": 0.37121880497985255,
+ "grad_norm": 20195.037109375,
+ "learning_rate": 8.087631840675715e-05,
+ "loss": 0.4416,
+ "step": 71950
+ },
+ {
+ "epoch": 0.3714767749624654,
+ "grad_norm": 21187.99609375,
+ "learning_rate": 8.084757737494184e-05,
+ "loss": 0.452,
+ "step": 72000
+ },
+ {
+ "epoch": 0.3717347449450782,
+ "grad_norm": 20694.912109375,
+ "learning_rate": 8.081881987728703e-05,
+ "loss": 0.4416,
+ "step": 72050
+ },
+ {
+ "epoch": 0.37199271492769104,
+ "grad_norm": 23006.939453125,
+ "learning_rate": 8.079004592914297e-05,
+ "loss": 0.4426,
+ "step": 72100
+ },
+ {
+ "epoch": 0.37225068491030383,
+ "grad_norm": 21854.025390625,
+ "learning_rate": 8.076125554586859e-05,
+ "loss": 0.4453,
+ "step": 72150
+ },
+ {
+ "epoch": 0.3725086548929167,
+ "grad_norm": 19155.400390625,
+ "learning_rate": 8.073244874283166e-05,
+ "loss": 0.4539,
+ "step": 72200
+ },
+ {
+ "epoch": 0.3727666248755295,
+ "grad_norm": 22085.5625,
+ "learning_rate": 8.070362553540869e-05,
+ "loss": 0.4474,
+ "step": 72250
+ },
+ {
+ "epoch": 0.3730245948581423,
+ "grad_norm": 21225.626953125,
+ "learning_rate": 8.067478593898495e-05,
+ "loss": 0.4431,
+ "step": 72300
+ },
+ {
+ "epoch": 0.3732825648407551,
+ "grad_norm": 21605.546875,
+ "learning_rate": 8.064592996895446e-05,
+ "loss": 0.4534,
+ "step": 72350
+ },
+ {
+ "epoch": 0.37354053482336796,
+ "grad_norm": 20774.87109375,
+ "learning_rate": 8.061705764071999e-05,
+ "loss": 0.4462,
+ "step": 72400
+ },
+ {
+ "epoch": 0.37379850480598076,
+ "grad_norm": 21871.390625,
+ "learning_rate": 8.0588168969693e-05,
+ "loss": 0.4445,
+ "step": 72450
+ },
+ {
+ "epoch": 0.3740564747885936,
+ "grad_norm": 22102.560546875,
+ "learning_rate": 8.05592639712937e-05,
+ "loss": 0.4478,
+ "step": 72500
+ },
+ {
+ "epoch": 0.3743144447712064,
+ "grad_norm": 21172.283203125,
+ "learning_rate": 8.053034266095105e-05,
+ "loss": 0.4469,
+ "step": 72550
+ },
+ {
+ "epoch": 0.37457241475381925,
+ "grad_norm": 21827.390625,
+ "learning_rate": 8.050140505410268e-05,
+ "loss": 0.4485,
+ "step": 72600
+ },
+ {
+ "epoch": 0.37483038473643204,
+ "grad_norm": 21271.87890625,
+ "learning_rate": 8.047245116619492e-05,
+ "loss": 0.45,
+ "step": 72650
+ },
+ {
+ "epoch": 0.3750883547190449,
+ "grad_norm": 21192.6484375,
+ "learning_rate": 8.04434810126828e-05,
+ "loss": 0.442,
+ "step": 72700
+ },
+ {
+ "epoch": 0.37534632470165774,
+ "grad_norm": 21529.736328125,
+ "learning_rate": 8.041449460903001e-05,
+ "loss": 0.4462,
+ "step": 72750
+ },
+ {
+ "epoch": 0.37560429468427053,
+ "grad_norm": 18609.474609375,
+ "learning_rate": 8.038549197070893e-05,
+ "loss": 0.4436,
+ "step": 72800
+ },
+ {
+ "epoch": 0.3758622646668834,
+ "grad_norm": 21631.82421875,
+ "learning_rate": 8.035647311320062e-05,
+ "loss": 0.4507,
+ "step": 72850
+ },
+ {
+ "epoch": 0.3761202346494962,
+ "grad_norm": 22347.056640625,
+ "learning_rate": 8.03274380519948e-05,
+ "loss": 0.4472,
+ "step": 72900
+ },
+ {
+ "epoch": 0.376378204632109,
+ "grad_norm": 20416.37109375,
+ "learning_rate": 8.029838680258979e-05,
+ "loss": 0.4475,
+ "step": 72950
+ },
+ {
+ "epoch": 0.3766361746147218,
+ "grad_norm": 21952.27734375,
+ "learning_rate": 8.026931938049259e-05,
+ "loss": 0.4449,
+ "step": 73000
+ },
+ {
+ "epoch": 0.37689414459733467,
+ "grad_norm": 23068.12109375,
+ "learning_rate": 8.024023580121885e-05,
+ "loss": 0.4477,
+ "step": 73050
+ },
+ {
+ "epoch": 0.37715211457994746,
+ "grad_norm": 21956.462890625,
+ "learning_rate": 8.021113608029281e-05,
+ "loss": 0.4459,
+ "step": 73100
+ },
+ {
+ "epoch": 0.3774100845625603,
+ "grad_norm": 20933.28125,
+ "learning_rate": 8.018202023324733e-05,
+ "loss": 0.4481,
+ "step": 73150
+ },
+ {
+ "epoch": 0.3776680545451731,
+ "grad_norm": 23138.638671875,
+ "learning_rate": 8.015288827562389e-05,
+ "loss": 0.437,
+ "step": 73200
+ },
+ {
+ "epoch": 0.37792602452778595,
+ "grad_norm": 20973.119140625,
+ "learning_rate": 8.012374022297255e-05,
+ "loss": 0.4454,
+ "step": 73250
+ },
+ {
+ "epoch": 0.37818399451039875,
+ "grad_norm": 21328.29296875,
+ "learning_rate": 8.0094576090852e-05,
+ "loss": 0.4426,
+ "step": 73300
+ },
+ {
+ "epoch": 0.3784419644930116,
+ "grad_norm": 20653.591796875,
+ "learning_rate": 8.006539589482949e-05,
+ "loss": 0.4448,
+ "step": 73350
+ },
+ {
+ "epoch": 0.3786999344756244,
+ "grad_norm": 21520.181640625,
+ "learning_rate": 8.003619965048083e-05,
+ "loss": 0.4428,
+ "step": 73400
+ },
+ {
+ "epoch": 0.37895790445823724,
+ "grad_norm": 20736.89453125,
+ "learning_rate": 8.000698737339041e-05,
+ "loss": 0.4483,
+ "step": 73450
+ },
+ {
+ "epoch": 0.3792158744408501,
+ "grad_norm": 23887.587890625,
+ "learning_rate": 7.997775907915118e-05,
+ "loss": 0.4518,
+ "step": 73500
+ },
+ {
+ "epoch": 0.3794738444234629,
+ "grad_norm": 23771.8671875,
+ "learning_rate": 7.994851478336465e-05,
+ "loss": 0.4479,
+ "step": 73550
+ },
+ {
+ "epoch": 0.37973181440607573,
+ "grad_norm": 21563.27734375,
+ "learning_rate": 7.991925450164084e-05,
+ "loss": 0.4433,
+ "step": 73600
+ },
+ {
+ "epoch": 0.3799897843886885,
+ "grad_norm": 21403.751953125,
+ "learning_rate": 7.988997824959832e-05,
+ "loss": 0.4443,
+ "step": 73650
+ },
+ {
+ "epoch": 0.38024775437130137,
+ "grad_norm": 22136.51171875,
+ "learning_rate": 7.986068604286421e-05,
+ "loss": 0.446,
+ "step": 73700
+ },
+ {
+ "epoch": 0.38050572435391417,
+ "grad_norm": 22143.857421875,
+ "learning_rate": 7.98313778970741e-05,
+ "loss": 0.4416,
+ "step": 73750
+ },
+ {
+ "epoch": 0.380763694336527,
+ "grad_norm": 22035.1171875,
+ "learning_rate": 7.980205382787211e-05,
+ "loss": 0.4413,
+ "step": 73800
+ },
+ {
+ "epoch": 0.3810216643191398,
+ "grad_norm": 21744.25390625,
+ "learning_rate": 7.97727138509109e-05,
+ "loss": 0.4463,
+ "step": 73850
+ },
+ {
+ "epoch": 0.38127963430175266,
+ "grad_norm": 21739.26171875,
+ "learning_rate": 7.974335798185153e-05,
+ "loss": 0.4415,
+ "step": 73900
+ },
+ {
+ "epoch": 0.38153760428436545,
+ "grad_norm": 20974.59765625,
+ "learning_rate": 7.971398623636361e-05,
+ "loss": 0.4457,
+ "step": 73950
+ },
+ {
+ "epoch": 0.3817955742669783,
+ "grad_norm": 19807.79296875,
+ "learning_rate": 7.968459863012523e-05,
+ "loss": 0.4423,
+ "step": 74000
+ },
+ {
+ "epoch": 0.3820535442495911,
+ "grad_norm": 21711.158203125,
+ "learning_rate": 7.96551951788229e-05,
+ "loss": 0.4466,
+ "step": 74050
+ },
+ {
+ "epoch": 0.38231151423220394,
+ "grad_norm": 19187.47265625,
+ "learning_rate": 7.962577589815163e-05,
+ "loss": 0.4387,
+ "step": 74100
+ },
+ {
+ "epoch": 0.3825694842148168,
+ "grad_norm": 19402.611328125,
+ "learning_rate": 7.959634080381486e-05,
+ "loss": 0.444,
+ "step": 74150
+ },
+ {
+ "epoch": 0.3828274541974296,
+ "grad_norm": 21287.9765625,
+ "learning_rate": 7.956688991152445e-05,
+ "loss": 0.4386,
+ "step": 74200
+ },
+ {
+ "epoch": 0.38308542418004243,
+ "grad_norm": 20430.591796875,
+ "learning_rate": 7.953742323700075e-05,
+ "loss": 0.4453,
+ "step": 74250
+ },
+ {
+ "epoch": 0.3833433941626552,
+ "grad_norm": 23246.041015625,
+ "learning_rate": 7.950794079597248e-05,
+ "loss": 0.4448,
+ "step": 74300
+ },
+ {
+ "epoch": 0.3836013641452681,
+ "grad_norm": 23098.74609375,
+ "learning_rate": 7.94784426041768e-05,
+ "loss": 0.4449,
+ "step": 74350
+ },
+ {
+ "epoch": 0.38385933412788087,
+ "grad_norm": 21504.71484375,
+ "learning_rate": 7.944892867735929e-05,
+ "loss": 0.4423,
+ "step": 74400
+ },
+ {
+ "epoch": 0.3841173041104937,
+ "grad_norm": 20115.0859375,
+ "learning_rate": 7.941939903127386e-05,
+ "loss": 0.4462,
+ "step": 74450
+ },
+ {
+ "epoch": 0.3843752740931065,
+ "grad_norm": 20473.681640625,
+ "learning_rate": 7.938985368168293e-05,
+ "loss": 0.4541,
+ "step": 74500
+ },
+ {
+ "epoch": 0.38463324407571936,
+ "grad_norm": 19664.6640625,
+ "learning_rate": 7.93602926443572e-05,
+ "loss": 0.4439,
+ "step": 74550
+ },
+ {
+ "epoch": 0.38489121405833215,
+ "grad_norm": 20806.474609375,
+ "learning_rate": 7.933071593507579e-05,
+ "loss": 0.439,
+ "step": 74600
+ },
+ {
+ "epoch": 0.385149184040945,
+ "grad_norm": 20905.197265625,
+ "learning_rate": 7.930112356962618e-05,
+ "loss": 0.444,
+ "step": 74650
+ },
+ {
+ "epoch": 0.3854071540235578,
+ "grad_norm": 26333.470703125,
+ "learning_rate": 7.927151556380417e-05,
+ "loss": 0.4462,
+ "step": 74700
+ },
+ {
+ "epoch": 0.38566512400617065,
+ "grad_norm": 20478.18359375,
+ "learning_rate": 7.924189193341396e-05,
+ "loss": 0.4456,
+ "step": 74750
+ },
+ {
+ "epoch": 0.38592309398878344,
+ "grad_norm": 20605.662109375,
+ "learning_rate": 7.921225269426808e-05,
+ "loss": 0.4412,
+ "step": 74800
+ },
+ {
+ "epoch": 0.3861810639713963,
+ "grad_norm": 23029.943359375,
+ "learning_rate": 7.918259786218738e-05,
+ "loss": 0.4427,
+ "step": 74850
+ },
+ {
+ "epoch": 0.38643903395400914,
+ "grad_norm": 23275.130859375,
+ "learning_rate": 7.915292745300103e-05,
+ "loss": 0.4436,
+ "step": 74900
+ },
+ {
+ "epoch": 0.38669700393662193,
+ "grad_norm": 22123.671875,
+ "learning_rate": 7.91232414825465e-05,
+ "loss": 0.4456,
+ "step": 74950
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "grad_norm": 22476.365234375,
+ "learning_rate": 7.909353996666961e-05,
+ "loss": 0.4424,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "eval_loss": 0.43277591466903687,
+ "eval_runtime": 3260.4686,
+ "eval_samples_per_second": 951.127,
+ "eval_steps_per_second": 1.858,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3872129439018476,
+ "grad_norm": 22150.966796875,
+ "learning_rate": 7.906382292122448e-05,
+ "loss": 0.4407,
+ "step": 75050
+ },
+ {
+ "epoch": 0.3874709138844604,
+ "grad_norm": 20100.5625,
+ "learning_rate": 7.903409036207343e-05,
+ "loss": 0.4443,
+ "step": 75100
+ },
+ {
+ "epoch": 0.3877288838670732,
+ "grad_norm": 22078.353515625,
+ "learning_rate": 7.900434230508715e-05,
+ "loss": 0.4468,
+ "step": 75150
+ },
+ {
+ "epoch": 0.38798685384968606,
+ "grad_norm": 20395.498046875,
+ "learning_rate": 7.897457876614461e-05,
+ "loss": 0.4424,
+ "step": 75200
+ },
+ {
+ "epoch": 0.38824482383229886,
+ "grad_norm": 23190.4140625,
+ "learning_rate": 7.894479976113298e-05,
+ "loss": 0.4394,
+ "step": 75250
+ },
+ {
+ "epoch": 0.3885027938149117,
+ "grad_norm": 21523.7265625,
+ "learning_rate": 7.891500530594771e-05,
+ "loss": 0.4441,
+ "step": 75300
+ },
+ {
+ "epoch": 0.3887607637975245,
+ "grad_norm": 22941.23828125,
+ "learning_rate": 7.888519541649253e-05,
+ "loss": 0.443,
+ "step": 75350
+ },
+ {
+ "epoch": 0.38901873378013735,
+ "grad_norm": 21467.90234375,
+ "learning_rate": 7.885537010867936e-05,
+ "loss": 0.4478,
+ "step": 75400
+ },
+ {
+ "epoch": 0.38927670376275014,
+ "grad_norm": 22635.732421875,
+ "learning_rate": 7.882552939842837e-05,
+ "loss": 0.4415,
+ "step": 75450
+ },
+ {
+ "epoch": 0.389534673745363,
+ "grad_norm": 21242.326171875,
+ "learning_rate": 7.879567330166797e-05,
+ "loss": 0.4352,
+ "step": 75500
+ },
+ {
+ "epoch": 0.38979264372797584,
+ "grad_norm": 20005.158203125,
+ "learning_rate": 7.876580183433475e-05,
+ "loss": 0.4393,
+ "step": 75550
+ },
+ {
+ "epoch": 0.39005061371058863,
+ "grad_norm": 23355.044921875,
+ "learning_rate": 7.873591501237351e-05,
+ "loss": 0.4465,
+ "step": 75600
+ },
+ {
+ "epoch": 0.3903085836932015,
+ "grad_norm": 21217.359375,
+ "learning_rate": 7.870601285173731e-05,
+ "loss": 0.4437,
+ "step": 75650
+ },
+ {
+ "epoch": 0.3905665536758143,
+ "grad_norm": 22424.580078125,
+ "learning_rate": 7.867609536838729e-05,
+ "loss": 0.4397,
+ "step": 75700
+ },
+ {
+ "epoch": 0.3908245236584271,
+ "grad_norm": 20943.65234375,
+ "learning_rate": 7.864616257829285e-05,
+ "loss": 0.4427,
+ "step": 75750
+ },
+ {
+ "epoch": 0.3910824936410399,
+ "grad_norm": 23246.5625,
+ "learning_rate": 7.861621449743152e-05,
+ "loss": 0.4479,
+ "step": 75800
+ },
+ {
+ "epoch": 0.39134046362365277,
+ "grad_norm": 21575.830078125,
+ "learning_rate": 7.858625114178902e-05,
+ "loss": 0.4384,
+ "step": 75850
+ },
+ {
+ "epoch": 0.39159843360626556,
+ "grad_norm": 22053.5546875,
+ "learning_rate": 7.855627252735918e-05,
+ "loss": 0.4364,
+ "step": 75900
+ },
+ {
+ "epoch": 0.3918564035888784,
+ "grad_norm": 21934.55078125,
+ "learning_rate": 7.852627867014406e-05,
+ "loss": 0.4466,
+ "step": 75950
+ },
+ {
+ "epoch": 0.3921143735714912,
+ "grad_norm": 20184.078125,
+ "learning_rate": 7.849626958615374e-05,
+ "loss": 0.4422,
+ "step": 76000
+ },
+ {
+ "epoch": 0.39237234355410405,
+ "grad_norm": 21770.923828125,
+ "learning_rate": 7.846624529140652e-05,
+ "loss": 0.4382,
+ "step": 76050
+ },
+ {
+ "epoch": 0.39263031353671685,
+ "grad_norm": 21592.16796875,
+ "learning_rate": 7.843620580192877e-05,
+ "loss": 0.4404,
+ "step": 76100
+ },
+ {
+ "epoch": 0.3928882835193297,
+ "grad_norm": 19634.1875,
+ "learning_rate": 7.8406151133755e-05,
+ "loss": 0.4443,
+ "step": 76150
+ },
+ {
+ "epoch": 0.3931462535019425,
+ "grad_norm": 24045.01171875,
+ "learning_rate": 7.837608130292782e-05,
+ "loss": 0.438,
+ "step": 76200
+ },
+ {
+ "epoch": 0.39340422348455534,
+ "grad_norm": 21739.921875,
+ "learning_rate": 7.83459963254979e-05,
+ "loss": 0.4474,
+ "step": 76250
+ },
+ {
+ "epoch": 0.3936621934671682,
+ "grad_norm": 20915.56640625,
+ "learning_rate": 7.831589621752405e-05,
+ "loss": 0.4463,
+ "step": 76300
+ },
+ {
+ "epoch": 0.393920163449781,
+ "grad_norm": 18799.80078125,
+ "learning_rate": 7.828578099507308e-05,
+ "loss": 0.4401,
+ "step": 76350
+ },
+ {
+ "epoch": 0.39417813343239383,
+ "grad_norm": 19029.51171875,
+ "learning_rate": 7.825565067421995e-05,
+ "loss": 0.4428,
+ "step": 76400
+ },
+ {
+ "epoch": 0.3944361034150066,
+ "grad_norm": 22817.376953125,
+ "learning_rate": 7.822550527104762e-05,
+ "loss": 0.4467,
+ "step": 76450
+ },
+ {
+ "epoch": 0.39469407339761947,
+ "grad_norm": 19165.529296875,
+ "learning_rate": 7.819534480164713e-05,
+ "loss": 0.4365,
+ "step": 76500
+ },
+ {
+ "epoch": 0.39495204338023226,
+ "grad_norm": 22980.056640625,
+ "learning_rate": 7.816516928211756e-05,
+ "loss": 0.4386,
+ "step": 76550
+ },
+ {
+ "epoch": 0.3952100133628451,
+ "grad_norm": 21261.7109375,
+ "learning_rate": 7.813497872856603e-05,
+ "loss": 0.4358,
+ "step": 76600
+ },
+ {
+ "epoch": 0.3954679833454579,
+ "grad_norm": 21533.779296875,
+ "learning_rate": 7.810477315710763e-05,
+ "loss": 0.4444,
+ "step": 76650
+ },
+ {
+ "epoch": 0.39572595332807076,
+ "grad_norm": 20503.556640625,
+ "learning_rate": 7.807455258386556e-05,
+ "loss": 0.4446,
+ "step": 76700
+ },
+ {
+ "epoch": 0.39598392331068355,
+ "grad_norm": 21180.939453125,
+ "learning_rate": 7.804431702497093e-05,
+ "loss": 0.4486,
+ "step": 76750
+ },
+ {
+ "epoch": 0.3962418932932964,
+ "grad_norm": 24126.484375,
+ "learning_rate": 7.801406649656294e-05,
+ "loss": 0.4419,
+ "step": 76800
+ },
+ {
+ "epoch": 0.3964998632759092,
+ "grad_norm": 19791.345703125,
+ "learning_rate": 7.79838010147887e-05,
+ "loss": 0.4499,
+ "step": 76850
+ },
+ {
+ "epoch": 0.39675783325852204,
+ "grad_norm": 21118.822265625,
+ "learning_rate": 7.795352059580334e-05,
+ "loss": 0.4403,
+ "step": 76900
+ },
+ {
+ "epoch": 0.39701580324113483,
+ "grad_norm": 20787.6015625,
+ "learning_rate": 7.792322525577e-05,
+ "loss": 0.4394,
+ "step": 76950
+ },
+ {
+ "epoch": 0.3972737732237477,
+ "grad_norm": 21575.86328125,
+ "learning_rate": 7.789291501085972e-05,
+ "loss": 0.4482,
+ "step": 77000
+ },
+ {
+ "epoch": 0.39753174320636053,
+ "grad_norm": 21271.287109375,
+ "learning_rate": 7.78625898772515e-05,
+ "loss": 0.4413,
+ "step": 77050
+ },
+ {
+ "epoch": 0.3977897131889733,
+ "grad_norm": 21294.7890625,
+ "learning_rate": 7.783224987113235e-05,
+ "loss": 0.4393,
+ "step": 77100
+ },
+ {
+ "epoch": 0.3980476831715862,
+ "grad_norm": 21880.341796875,
+ "learning_rate": 7.780189500869716e-05,
+ "loss": 0.4464,
+ "step": 77150
+ },
+ {
+ "epoch": 0.39830565315419897,
+ "grad_norm": 22501.482421875,
+ "learning_rate": 7.777152530614876e-05,
+ "loss": 0.4384,
+ "step": 77200
+ },
+ {
+ "epoch": 0.3985636231368118,
+ "grad_norm": 20404.89453125,
+ "learning_rate": 7.774114077969792e-05,
+ "loss": 0.4355,
+ "step": 77250
+ },
+ {
+ "epoch": 0.3988215931194246,
+ "grad_norm": 21435.66015625,
+ "learning_rate": 7.77107414455633e-05,
+ "loss": 0.4468,
+ "step": 77300
+ },
+ {
+ "epoch": 0.39907956310203746,
+ "grad_norm": 20239.091796875,
+ "learning_rate": 7.768032731997148e-05,
+ "loss": 0.4453,
+ "step": 77350
+ },
+ {
+ "epoch": 0.39933753308465025,
+ "grad_norm": 19040.37109375,
+ "learning_rate": 7.764989841915694e-05,
+ "loss": 0.4487,
+ "step": 77400
+ },
+ {
+ "epoch": 0.3995955030672631,
+ "grad_norm": 22501.13671875,
+ "learning_rate": 7.761945475936203e-05,
+ "loss": 0.4488,
+ "step": 77450
+ },
+ {
+ "epoch": 0.3998534730498759,
+ "grad_norm": 20773.27734375,
+ "learning_rate": 7.7588996356837e-05,
+ "loss": 0.4384,
+ "step": 77500
+ },
+ {
+ "epoch": 0.40011144303248874,
+ "grad_norm": 22598.4140625,
+ "learning_rate": 7.755852322783994e-05,
+ "loss": 0.4358,
+ "step": 77550
+ },
+ {
+ "epoch": 0.40036941301510154,
+ "grad_norm": 20656.033203125,
+ "learning_rate": 7.752803538863683e-05,
+ "loss": 0.4434,
+ "step": 77600
+ },
+ {
+ "epoch": 0.4006273829977144,
+ "grad_norm": 20882.3125,
+ "learning_rate": 7.749753285550146e-05,
+ "loss": 0.4408,
+ "step": 77650
+ },
+ {
+ "epoch": 0.40088535298032724,
+ "grad_norm": 19519.408203125,
+ "learning_rate": 7.746701564471553e-05,
+ "loss": 0.439,
+ "step": 77700
+ },
+ {
+ "epoch": 0.40114332296294003,
+ "grad_norm": 21141.80859375,
+ "learning_rate": 7.74364837725685e-05,
+ "loss": 0.4422,
+ "step": 77750
+ },
+ {
+ "epoch": 0.4014012929455529,
+ "grad_norm": 21487.45703125,
+ "learning_rate": 7.74059372553577e-05,
+ "loss": 0.429,
+ "step": 77800
+ },
+ {
+ "epoch": 0.4016592629281657,
+ "grad_norm": 19889.447265625,
+ "learning_rate": 7.737537610938829e-05,
+ "loss": 0.4474,
+ "step": 77850
+ },
+ {
+ "epoch": 0.4019172329107785,
+ "grad_norm": 21914.947265625,
+ "learning_rate": 7.73448003509732e-05,
+ "loss": 0.4403,
+ "step": 77900
+ },
+ {
+ "epoch": 0.4021752028933913,
+ "grad_norm": 24025.521484375,
+ "learning_rate": 7.731420999643319e-05,
+ "loss": 0.4432,
+ "step": 77950
+ },
+ {
+ "epoch": 0.40243317287600416,
+ "grad_norm": 19703.50390625,
+ "learning_rate": 7.728360506209679e-05,
+ "loss": 0.443,
+ "step": 78000
+ },
+ {
+ "epoch": 0.40269114285861696,
+ "grad_norm": 21566.37890625,
+ "learning_rate": 7.725298556430034e-05,
+ "loss": 0.448,
+ "step": 78050
+ },
+ {
+ "epoch": 0.4029491128412298,
+ "grad_norm": 21902.564453125,
+ "learning_rate": 7.72223515193879e-05,
+ "loss": 0.438,
+ "step": 78100
+ },
+ {
+ "epoch": 0.4032070828238426,
+ "grad_norm": 20892.7578125,
+ "learning_rate": 7.719170294371136e-05,
+ "loss": 0.4382,
+ "step": 78150
+ },
+ {
+ "epoch": 0.40346505280645545,
+ "grad_norm": 21648.673828125,
+ "learning_rate": 7.716103985363033e-05,
+ "loss": 0.4378,
+ "step": 78200
+ },
+ {
+ "epoch": 0.40372302278906824,
+ "grad_norm": 23124.40625,
+ "learning_rate": 7.713036226551215e-05,
+ "loss": 0.442,
+ "step": 78250
+ },
+ {
+ "epoch": 0.4039809927716811,
+ "grad_norm": 25006.751953125,
+ "learning_rate": 7.709967019573195e-05,
+ "loss": 0.4397,
+ "step": 78300
+ },
+ {
+ "epoch": 0.4042389627542939,
+ "grad_norm": 20722.802734375,
+ "learning_rate": 7.706896366067256e-05,
+ "loss": 0.4388,
+ "step": 78350
+ },
+ {
+ "epoch": 0.40449693273690673,
+ "grad_norm": 20202.013671875,
+ "learning_rate": 7.703824267672452e-05,
+ "loss": 0.4404,
+ "step": 78400
+ },
+ {
+ "epoch": 0.4047549027195196,
+ "grad_norm": 21261.9375,
+ "learning_rate": 7.700750726028609e-05,
+ "loss": 0.4369,
+ "step": 78450
+ },
+ {
+ "epoch": 0.4050128727021324,
+ "grad_norm": 25343.57421875,
+ "learning_rate": 7.69767574277633e-05,
+ "loss": 0.4444,
+ "step": 78500
+ },
+ {
+ "epoch": 0.4052708426847452,
+ "grad_norm": 20222.767578125,
+ "learning_rate": 7.694599319556972e-05,
+ "loss": 0.4425,
+ "step": 78550
+ },
+ {
+ "epoch": 0.405528812667358,
+ "grad_norm": 22934.466796875,
+ "learning_rate": 7.691521458012678e-05,
+ "loss": 0.4411,
+ "step": 78600
+ },
+ {
+ "epoch": 0.40578678264997087,
+ "grad_norm": 22235.30078125,
+ "learning_rate": 7.688442159786346e-05,
+ "loss": 0.4445,
+ "step": 78650
+ },
+ {
+ "epoch": 0.40604475263258366,
+ "grad_norm": 21313.986328125,
+ "learning_rate": 7.68536142652165e-05,
+ "loss": 0.4341,
+ "step": 78700
+ },
+ {
+ "epoch": 0.4063027226151965,
+ "grad_norm": 20130.53515625,
+ "learning_rate": 7.68227925986302e-05,
+ "loss": 0.4395,
+ "step": 78750
+ },
+ {
+ "epoch": 0.4065606925978093,
+ "grad_norm": 19342.740234375,
+ "learning_rate": 7.679195661455664e-05,
+ "loss": 0.4424,
+ "step": 78800
+ },
+ {
+ "epoch": 0.40681866258042215,
+ "grad_norm": 21876.705078125,
+ "learning_rate": 7.676110632945543e-05,
+ "loss": 0.4415,
+ "step": 78850
+ },
+ {
+ "epoch": 0.40707663256303495,
+ "grad_norm": 23199.501953125,
+ "learning_rate": 7.673024175979384e-05,
+ "loss": 0.4423,
+ "step": 78900
+ },
+ {
+ "epoch": 0.4073346025456478,
+ "grad_norm": 22781.091796875,
+ "learning_rate": 7.669936292204683e-05,
+ "loss": 0.4398,
+ "step": 78950
+ },
+ {
+ "epoch": 0.4075925725282606,
+ "grad_norm": 24025.9375,
+ "learning_rate": 7.666846983269688e-05,
+ "loss": 0.4326,
+ "step": 79000
+ },
+ {
+ "epoch": 0.40785054251087344,
+ "grad_norm": 20797.056640625,
+ "learning_rate": 7.663756250823413e-05,
+ "loss": 0.4388,
+ "step": 79050
+ },
+ {
+ "epoch": 0.40810851249348623,
+ "grad_norm": 25106.67578125,
+ "learning_rate": 7.660664096515632e-05,
+ "loss": 0.4385,
+ "step": 79100
+ },
+ {
+ "epoch": 0.4083664824760991,
+ "grad_norm": 22217.36328125,
+ "learning_rate": 7.657570521996877e-05,
+ "loss": 0.4455,
+ "step": 79150
+ },
+ {
+ "epoch": 0.40862445245871193,
+ "grad_norm": 21679.291015625,
+ "learning_rate": 7.654475528918439e-05,
+ "loss": 0.4409,
+ "step": 79200
+ },
+ {
+ "epoch": 0.4088824224413247,
+ "grad_norm": 20133.583984375,
+ "learning_rate": 7.651379118932364e-05,
+ "loss": 0.4391,
+ "step": 79250
+ },
+ {
+ "epoch": 0.40914039242393757,
+ "grad_norm": 23019.171875,
+ "learning_rate": 7.648281293691457e-05,
+ "loss": 0.446,
+ "step": 79300
+ },
+ {
+ "epoch": 0.40939836240655036,
+ "grad_norm": 24098.38671875,
+ "learning_rate": 7.645182054849276e-05,
+ "loss": 0.4417,
+ "step": 79350
+ },
+ {
+ "epoch": 0.4096563323891632,
+ "grad_norm": 23057.240234375,
+ "learning_rate": 7.642081404060136e-05,
+ "loss": 0.4424,
+ "step": 79400
+ },
+ {
+ "epoch": 0.409914302371776,
+ "grad_norm": 20033.328125,
+ "learning_rate": 7.638979342979103e-05,
+ "loss": 0.4386,
+ "step": 79450
+ },
+ {
+ "epoch": 0.41017227235438886,
+ "grad_norm": 20978.68359375,
+ "learning_rate": 7.635875873261995e-05,
+ "loss": 0.4363,
+ "step": 79500
+ },
+ {
+ "epoch": 0.41043024233700165,
+ "grad_norm": 21347.068359375,
+ "learning_rate": 7.63277099656539e-05,
+ "loss": 0.4431,
+ "step": 79550
+ },
+ {
+ "epoch": 0.4106882123196145,
+ "grad_norm": 22031.8125,
+ "learning_rate": 7.629664714546604e-05,
+ "loss": 0.4313,
+ "step": 79600
+ },
+ {
+ "epoch": 0.4109461823022273,
+ "grad_norm": 23963.99609375,
+ "learning_rate": 7.626557028863717e-05,
+ "loss": 0.4363,
+ "step": 79650
+ },
+ {
+ "epoch": 0.41120415228484014,
+ "grad_norm": 20183.259765625,
+ "learning_rate": 7.623447941175548e-05,
+ "loss": 0.4419,
+ "step": 79700
+ },
+ {
+ "epoch": 0.41146212226745293,
+ "grad_norm": 23588.68359375,
+ "learning_rate": 7.620337453141667e-05,
+ "loss": 0.4388,
+ "step": 79750
+ },
+ {
+ "epoch": 0.4117200922500658,
+ "grad_norm": 22210.7265625,
+ "learning_rate": 7.617225566422395e-05,
+ "loss": 0.442,
+ "step": 79800
+ },
+ {
+ "epoch": 0.41197806223267863,
+ "grad_norm": 18647.93359375,
+ "learning_rate": 7.614112282678794e-05,
+ "loss": 0.4349,
+ "step": 79850
+ },
+ {
+ "epoch": 0.4122360322152914,
+ "grad_norm": 20993.388671875,
+ "learning_rate": 7.610997603572675e-05,
+ "loss": 0.4386,
+ "step": 79900
+ },
+ {
+ "epoch": 0.4124940021979043,
+ "grad_norm": 23693.26171875,
+ "learning_rate": 7.607881530766596e-05,
+ "loss": 0.4385,
+ "step": 79950
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "grad_norm": 22608.26953125,
+ "learning_rate": 7.604764065923852e-05,
+ "loss": 0.4415,
+ "step": 80000
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "eval_loss": 0.4290848970413208,
+ "eval_runtime": 3332.9887,
+ "eval_samples_per_second": 930.432,
+ "eval_steps_per_second": 1.817,
+ "step": 80000
+ },
+ {
+ "epoch": 0.4130099421631299,
+ "grad_norm": 23348.44921875,
+ "learning_rate": 7.60164521070849e-05,
+ "loss": 0.4392,
+ "step": 80050
+ },
+ {
+ "epoch": 0.4132679121457427,
+ "grad_norm": 19942.9921875,
+ "learning_rate": 7.598524966785293e-05,
+ "loss": 0.4362,
+ "step": 80100
+ },
+ {
+ "epoch": 0.41352588212835556,
+ "grad_norm": 22776.587890625,
+ "learning_rate": 7.595403335819786e-05,
+ "loss": 0.4402,
+ "step": 80150
+ },
+ {
+ "epoch": 0.41378385211096835,
+ "grad_norm": 22519.923828125,
+ "learning_rate": 7.592280319478233e-05,
+ "loss": 0.4412,
+ "step": 80200
+ },
+ {
+ "epoch": 0.4140418220935812,
+ "grad_norm": 22480.52734375,
+ "learning_rate": 7.589155919427645e-05,
+ "loss": 0.4393,
+ "step": 80250
+ },
+ {
+ "epoch": 0.414299792076194,
+ "grad_norm": 20900.625,
+ "learning_rate": 7.586030137335762e-05,
+ "loss": 0.4344,
+ "step": 80300
+ },
+ {
+ "epoch": 0.41455776205880684,
+ "grad_norm": 21272.306640625,
+ "learning_rate": 7.582902974871069e-05,
+ "loss": 0.4385,
+ "step": 80350
+ },
+ {
+ "epoch": 0.41481573204141964,
+ "grad_norm": 21448.478515625,
+ "learning_rate": 7.57977443370278e-05,
+ "loss": 0.4395,
+ "step": 80400
+ },
+ {
+ "epoch": 0.4150737020240325,
+ "grad_norm": 21854.537109375,
+ "learning_rate": 7.576644515500855e-05,
+ "loss": 0.4411,
+ "step": 80450
+ },
+ {
+ "epoch": 0.4153316720066453,
+ "grad_norm": 21458.689453125,
+ "learning_rate": 7.573513221935979e-05,
+ "loss": 0.4429,
+ "step": 80500
+ },
+ {
+ "epoch": 0.41558964198925813,
+ "grad_norm": 21895.71875,
+ "learning_rate": 7.57038055467958e-05,
+ "loss": 0.4391,
+ "step": 80550
+ },
+ {
+ "epoch": 0.415847611971871,
+ "grad_norm": 23495.921875,
+ "learning_rate": 7.567246515403812e-05,
+ "loss": 0.4398,
+ "step": 80600
+ },
+ {
+ "epoch": 0.41610558195448377,
+ "grad_norm": 26117.8671875,
+ "learning_rate": 7.564111105781568e-05,
+ "loss": 0.4407,
+ "step": 80650
+ },
+ {
+ "epoch": 0.4163635519370966,
+ "grad_norm": 21881.818359375,
+ "learning_rate": 7.560974327486466e-05,
+ "loss": 0.4336,
+ "step": 80700
+ },
+ {
+ "epoch": 0.4166215219197094,
+ "grad_norm": 21309.1015625,
+ "learning_rate": 7.557836182192859e-05,
+ "loss": 0.4371,
+ "step": 80750
+ },
+ {
+ "epoch": 0.41687949190232226,
+ "grad_norm": 21723.498046875,
+ "learning_rate": 7.554696671575826e-05,
+ "loss": 0.4384,
+ "step": 80800
+ },
+ {
+ "epoch": 0.41713746188493506,
+ "grad_norm": 19767.9609375,
+ "learning_rate": 7.55155579731118e-05,
+ "loss": 0.4375,
+ "step": 80850
+ },
+ {
+ "epoch": 0.4173954318675479,
+ "grad_norm": 18992.958984375,
+ "learning_rate": 7.548413561075456e-05,
+ "loss": 0.4419,
+ "step": 80900
+ },
+ {
+ "epoch": 0.4176534018501607,
+ "grad_norm": 21593.255859375,
+ "learning_rate": 7.545269964545921e-05,
+ "loss": 0.4372,
+ "step": 80950
+ },
+ {
+ "epoch": 0.41791137183277355,
+ "grad_norm": 19369.3125,
+ "learning_rate": 7.542125009400565e-05,
+ "loss": 0.4402,
+ "step": 81000
+ },
+ {
+ "epoch": 0.41816934181538634,
+ "grad_norm": 20552.06640625,
+ "learning_rate": 7.538978697318105e-05,
+ "loss": 0.4418,
+ "step": 81050
+ },
+ {
+ "epoch": 0.4184273117979992,
+ "grad_norm": 21554.94140625,
+ "learning_rate": 7.53583102997798e-05,
+ "loss": 0.4406,
+ "step": 81100
+ },
+ {
+ "epoch": 0.418685281780612,
+ "grad_norm": 21098.296875,
+ "learning_rate": 7.532682009060356e-05,
+ "loss": 0.443,
+ "step": 81150
+ },
+ {
+ "epoch": 0.41894325176322483,
+ "grad_norm": 24148.71484375,
+ "learning_rate": 7.529531636246116e-05,
+ "loss": 0.4345,
+ "step": 81200
+ },
+ {
+ "epoch": 0.4192012217458376,
+ "grad_norm": 20404.298828125,
+ "learning_rate": 7.526379913216872e-05,
+ "loss": 0.4335,
+ "step": 81250
+ },
+ {
+ "epoch": 0.4194591917284505,
+ "grad_norm": 22061.607421875,
+ "learning_rate": 7.52322684165495e-05,
+ "loss": 0.4385,
+ "step": 81300
+ },
+ {
+ "epoch": 0.4197171617110633,
+ "grad_norm": 18455.380859375,
+ "learning_rate": 7.520072423243398e-05,
+ "loss": 0.4337,
+ "step": 81350
+ },
+ {
+ "epoch": 0.4199751316936761,
+ "grad_norm": 23344.2734375,
+ "learning_rate": 7.516916659665987e-05,
+ "loss": 0.4401,
+ "step": 81400
+ },
+ {
+ "epoch": 0.42023310167628897,
+ "grad_norm": 20872.77734375,
+ "learning_rate": 7.5137595526072e-05,
+ "loss": 0.4394,
+ "step": 81450
+ },
+ {
+ "epoch": 0.42049107165890176,
+ "grad_norm": 21003.841796875,
+ "learning_rate": 7.51060110375224e-05,
+ "loss": 0.4402,
+ "step": 81500
+ },
+ {
+ "epoch": 0.4207490416415146,
+ "grad_norm": 22772.330078125,
+ "learning_rate": 7.507441314787025e-05,
+ "loss": 0.4438,
+ "step": 81550
+ },
+ {
+ "epoch": 0.4210070116241274,
+ "grad_norm": 19593.216796875,
+ "learning_rate": 7.504280187398189e-05,
+ "loss": 0.4375,
+ "step": 81600
+ },
+ {
+ "epoch": 0.42126498160674025,
+ "grad_norm": 20914.66796875,
+ "learning_rate": 7.501117723273084e-05,
+ "loss": 0.4397,
+ "step": 81650
+ },
+ {
+ "epoch": 0.42152295158935305,
+ "grad_norm": 20479.12109375,
+ "learning_rate": 7.497953924099768e-05,
+ "loss": 0.4365,
+ "step": 81700
+ },
+ {
+ "epoch": 0.4217809215719659,
+ "grad_norm": 20309.25,
+ "learning_rate": 7.494788791567017e-05,
+ "loss": 0.4461,
+ "step": 81750
+ },
+ {
+ "epoch": 0.4220388915545787,
+ "grad_norm": 21467.72265625,
+ "learning_rate": 7.491622327364318e-05,
+ "loss": 0.4354,
+ "step": 81800
+ },
+ {
+ "epoch": 0.42229686153719154,
+ "grad_norm": 20826.80859375,
+ "learning_rate": 7.488454533181871e-05,
+ "loss": 0.4398,
+ "step": 81850
+ },
+ {
+ "epoch": 0.42255483151980433,
+ "grad_norm": 20537.826171875,
+ "learning_rate": 7.485285410710577e-05,
+ "loss": 0.4443,
+ "step": 81900
+ },
+ {
+ "epoch": 0.4228128015024172,
+ "grad_norm": 19521.810546875,
+ "learning_rate": 7.482114961642057e-05,
+ "loss": 0.4379,
+ "step": 81950
+ },
+ {
+ "epoch": 0.42307077148503003,
+ "grad_norm": 19407.5234375,
+ "learning_rate": 7.478943187668633e-05,
+ "loss": 0.4429,
+ "step": 82000
+ },
+ {
+ "epoch": 0.4233287414676428,
+ "grad_norm": 23058.337890625,
+ "learning_rate": 7.475770090483338e-05,
+ "loss": 0.4362,
+ "step": 82050
+ },
+ {
+ "epoch": 0.42358671145025567,
+ "grad_norm": 27362.29296875,
+ "learning_rate": 7.472595671779907e-05,
+ "loss": 0.4413,
+ "step": 82100
+ },
+ {
+ "epoch": 0.42384468143286846,
+ "grad_norm": 20389.08203125,
+ "learning_rate": 7.469419933252789e-05,
+ "loss": 0.4386,
+ "step": 82150
+ },
+ {
+ "epoch": 0.4241026514154813,
+ "grad_norm": 21554.896484375,
+ "learning_rate": 7.466242876597125e-05,
+ "loss": 0.4387,
+ "step": 82200
+ },
+ {
+ "epoch": 0.4243606213980941,
+ "grad_norm": 23449.822265625,
+ "learning_rate": 7.463064503508772e-05,
+ "loss": 0.4402,
+ "step": 82250
+ },
+ {
+ "epoch": 0.42461859138070696,
+ "grad_norm": 23945.1328125,
+ "learning_rate": 7.459884815684279e-05,
+ "loss": 0.4393,
+ "step": 82300
+ },
+ {
+ "epoch": 0.42487656136331975,
+ "grad_norm": 21705.064453125,
+ "learning_rate": 7.456703814820904e-05,
+ "loss": 0.4374,
+ "step": 82350
+ },
+ {
+ "epoch": 0.4251345313459326,
+ "grad_norm": 20050.66796875,
+ "learning_rate": 7.453521502616607e-05,
+ "loss": 0.4433,
+ "step": 82400
+ },
+ {
+ "epoch": 0.4253925013285454,
+ "grad_norm": 24757.845703125,
+ "learning_rate": 7.45033788077004e-05,
+ "loss": 0.4362,
+ "step": 82450
+ },
+ {
+ "epoch": 0.42565047131115824,
+ "grad_norm": 21754.42578125,
+ "learning_rate": 7.44715295098056e-05,
+ "loss": 0.4386,
+ "step": 82500
+ },
+ {
+ "epoch": 0.42590844129377103,
+ "grad_norm": 22891.12890625,
+ "learning_rate": 7.443966714948222e-05,
+ "loss": 0.4438,
+ "step": 82550
+ },
+ {
+ "epoch": 0.4261664112763839,
+ "grad_norm": 22174.580078125,
+ "learning_rate": 7.440779174373776e-05,
+ "loss": 0.4388,
+ "step": 82600
+ },
+ {
+ "epoch": 0.4264243812589967,
+ "grad_norm": 20407.677734375,
+ "learning_rate": 7.43759033095867e-05,
+ "loss": 0.4412,
+ "step": 82650
+ },
+ {
+ "epoch": 0.4266823512416095,
+ "grad_norm": 21960.552734375,
+ "learning_rate": 7.434400186405045e-05,
+ "loss": 0.4394,
+ "step": 82700
+ },
+ {
+ "epoch": 0.4269403212242224,
+ "grad_norm": 20736.583984375,
+ "learning_rate": 7.431208742415741e-05,
+ "loss": 0.4382,
+ "step": 82750
+ },
+ {
+ "epoch": 0.42719829120683517,
+ "grad_norm": 21133.63671875,
+ "learning_rate": 7.428016000694286e-05,
+ "loss": 0.4379,
+ "step": 82800
+ },
+ {
+ "epoch": 0.427456261189448,
+ "grad_norm": 23741.525390625,
+ "learning_rate": 7.424821962944908e-05,
+ "loss": 0.4398,
+ "step": 82850
+ },
+ {
+ "epoch": 0.4277142311720608,
+ "grad_norm": 21936.802734375,
+ "learning_rate": 7.42162663087252e-05,
+ "loss": 0.4383,
+ "step": 82900
+ },
+ {
+ "epoch": 0.42797220115467366,
+ "grad_norm": 24459.85546875,
+ "learning_rate": 7.418430006182727e-05,
+ "loss": 0.4393,
+ "step": 82950
+ },
+ {
+ "epoch": 0.42823017113728645,
+ "grad_norm": 21729.9921875,
+ "learning_rate": 7.415232090581828e-05,
+ "loss": 0.4421,
+ "step": 83000
+ },
+ {
+ "epoch": 0.4284881411198993,
+ "grad_norm": 21081.5703125,
+ "learning_rate": 7.412032885776807e-05,
+ "loss": 0.4414,
+ "step": 83050
+ },
+ {
+ "epoch": 0.4287461111025121,
+ "grad_norm": 20296.740234375,
+ "learning_rate": 7.408832393475338e-05,
+ "loss": 0.4316,
+ "step": 83100
+ },
+ {
+ "epoch": 0.42900408108512494,
+ "grad_norm": 20874.30078125,
+ "learning_rate": 7.405630615385781e-05,
+ "loss": 0.433,
+ "step": 83150
+ },
+ {
+ "epoch": 0.42926205106773774,
+ "grad_norm": 20673.11328125,
+ "learning_rate": 7.402427553217183e-05,
+ "loss": 0.4386,
+ "step": 83200
+ },
+ {
+ "epoch": 0.4295200210503506,
+ "grad_norm": 22462.07421875,
+ "learning_rate": 7.39922320867928e-05,
+ "loss": 0.4464,
+ "step": 83250
+ },
+ {
+ "epoch": 0.4297779910329634,
+ "grad_norm": 20411.771484375,
+ "learning_rate": 7.396017583482487e-05,
+ "loss": 0.444,
+ "step": 83300
+ },
+ {
+ "epoch": 0.43003596101557623,
+ "grad_norm": 21137.6953125,
+ "learning_rate": 7.392810679337902e-05,
+ "loss": 0.4416,
+ "step": 83350
+ },
+ {
+ "epoch": 0.4302939309981891,
+ "grad_norm": 23059.064453125,
+ "learning_rate": 7.38960249795731e-05,
+ "loss": 0.4401,
+ "step": 83400
+ },
+ {
+ "epoch": 0.43055190098080187,
+ "grad_norm": 20305.22265625,
+ "learning_rate": 7.386393041053176e-05,
+ "loss": 0.4399,
+ "step": 83450
+ },
+ {
+ "epoch": 0.4308098709634147,
+ "grad_norm": 22247.779296875,
+ "learning_rate": 7.38318231033865e-05,
+ "loss": 0.4362,
+ "step": 83500
+ },
+ {
+ "epoch": 0.4310678409460275,
+ "grad_norm": 22231.337890625,
+ "learning_rate": 7.379970307527552e-05,
+ "loss": 0.4417,
+ "step": 83550
+ },
+ {
+ "epoch": 0.43132581092864036,
+ "grad_norm": 21788.875,
+ "learning_rate": 7.376757034334388e-05,
+ "loss": 0.4374,
+ "step": 83600
+ },
+ {
+ "epoch": 0.43158378091125316,
+ "grad_norm": 22237.51953125,
+ "learning_rate": 7.373542492474343e-05,
+ "loss": 0.4372,
+ "step": 83650
+ },
+ {
+ "epoch": 0.431841750893866,
+ "grad_norm": 21732.943359375,
+ "learning_rate": 7.370326683663278e-05,
+ "loss": 0.4395,
+ "step": 83700
+ },
+ {
+ "epoch": 0.4320997208764788,
+ "grad_norm": 19517.212890625,
+ "learning_rate": 7.367109609617729e-05,
+ "loss": 0.4371,
+ "step": 83750
+ },
+ {
+ "epoch": 0.43235769085909165,
+ "grad_norm": 23681.388671875,
+ "learning_rate": 7.363891272054903e-05,
+ "loss": 0.4383,
+ "step": 83800
+ },
+ {
+ "epoch": 0.43261566084170444,
+ "grad_norm": 23889.822265625,
+ "learning_rate": 7.360671672692691e-05,
+ "loss": 0.441,
+ "step": 83850
+ },
+ {
+ "epoch": 0.4328736308243173,
+ "grad_norm": 21159.45703125,
+ "learning_rate": 7.357450813249654e-05,
+ "loss": 0.4328,
+ "step": 83900
+ },
+ {
+ "epoch": 0.4331316008069301,
+ "grad_norm": 20617.83984375,
+ "learning_rate": 7.354228695445023e-05,
+ "loss": 0.4395,
+ "step": 83950
+ },
+ {
+ "epoch": 0.43338957078954293,
+ "grad_norm": 19741.568359375,
+ "learning_rate": 7.351005320998699e-05,
+ "loss": 0.4356,
+ "step": 84000
+ },
+ {
+ "epoch": 0.4336475407721557,
+ "grad_norm": 21407.771484375,
+ "learning_rate": 7.347780691631259e-05,
+ "loss": 0.4322,
+ "step": 84050
+ },
+ {
+ "epoch": 0.4339055107547686,
+ "grad_norm": 22396.5625,
+ "learning_rate": 7.344554809063947e-05,
+ "loss": 0.4379,
+ "step": 84100
+ },
+ {
+ "epoch": 0.4341634807373814,
+ "grad_norm": 23536.361328125,
+ "learning_rate": 7.34132767501868e-05,
+ "loss": 0.4372,
+ "step": 84150
+ },
+ {
+ "epoch": 0.4344214507199942,
+ "grad_norm": 23622.90234375,
+ "learning_rate": 7.338099291218036e-05,
+ "loss": 0.4361,
+ "step": 84200
+ },
+ {
+ "epoch": 0.43467942070260707,
+ "grad_norm": 24463.931640625,
+ "learning_rate": 7.334869659385264e-05,
+ "loss": 0.4478,
+ "step": 84250
+ },
+ {
+ "epoch": 0.43493739068521986,
+ "grad_norm": 21666.328125,
+ "learning_rate": 7.331638781244283e-05,
+ "loss": 0.4387,
+ "step": 84300
+ },
+ {
+ "epoch": 0.4351953606678327,
+ "grad_norm": 21145.6875,
+ "learning_rate": 7.328406658519669e-05,
+ "loss": 0.4362,
+ "step": 84350
+ },
+ {
+ "epoch": 0.4354533306504455,
+ "grad_norm": 21766.228515625,
+ "learning_rate": 7.325173292936667e-05,
+ "loss": 0.4433,
+ "step": 84400
+ },
+ {
+ "epoch": 0.43571130063305835,
+ "grad_norm": 23118.056640625,
+ "learning_rate": 7.321938686221185e-05,
+ "loss": 0.4317,
+ "step": 84450
+ },
+ {
+ "epoch": 0.43596927061567115,
+ "grad_norm": 20925.833984375,
+ "learning_rate": 7.318702840099793e-05,
+ "loss": 0.4348,
+ "step": 84500
+ },
+ {
+ "epoch": 0.436227240598284,
+ "grad_norm": 21725.630859375,
+ "learning_rate": 7.315465756299727e-05,
+ "loss": 0.4363,
+ "step": 84550
+ },
+ {
+ "epoch": 0.4364852105808968,
+ "grad_norm": 20223.537109375,
+ "learning_rate": 7.312227436548875e-05,
+ "loss": 0.4363,
+ "step": 84600
+ },
+ {
+ "epoch": 0.43674318056350964,
+ "grad_norm": 22766.71484375,
+ "learning_rate": 7.308987882575793e-05,
+ "loss": 0.442,
+ "step": 84650
+ },
+ {
+ "epoch": 0.43700115054612243,
+ "grad_norm": 20453.341796875,
+ "learning_rate": 7.305747096109688e-05,
+ "loss": 0.4362,
+ "step": 84700
+ },
+ {
+ "epoch": 0.4372591205287353,
+ "grad_norm": 20761.466796875,
+ "learning_rate": 7.302505078880431e-05,
+ "loss": 0.435,
+ "step": 84750
+ },
+ {
+ "epoch": 0.4375170905113481,
+ "grad_norm": 20815.27734375,
+ "learning_rate": 7.299261832618551e-05,
+ "loss": 0.4398,
+ "step": 84800
+ },
+ {
+ "epoch": 0.4377750604939609,
+ "grad_norm": 22528.06640625,
+ "learning_rate": 7.296017359055224e-05,
+ "loss": 0.44,
+ "step": 84850
+ },
+ {
+ "epoch": 0.43803303047657377,
+ "grad_norm": 21391.71484375,
+ "learning_rate": 7.292771659922293e-05,
+ "loss": 0.4376,
+ "step": 84900
+ },
+ {
+ "epoch": 0.43829100045918656,
+ "grad_norm": 21485.966796875,
+ "learning_rate": 7.289524736952245e-05,
+ "loss": 0.4424,
+ "step": 84950
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "grad_norm": 21160.314453125,
+ "learning_rate": 7.286276591878228e-05,
+ "loss": 0.4473,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "eval_loss": 0.4252757728099823,
+ "eval_runtime": 3252.991,
+ "eval_samples_per_second": 953.313,
+ "eval_steps_per_second": 1.862,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4388069404244122,
+ "grad_norm": 29667.109375,
+ "learning_rate": 7.283027226434036e-05,
+ "loss": 0.4414,
+ "step": 85050
+ },
+ {
+ "epoch": 0.43906491040702506,
+ "grad_norm": 24990.86328125,
+ "learning_rate": 7.27977664235412e-05,
+ "loss": 0.4321,
+ "step": 85100
+ },
+ {
+ "epoch": 0.43932288038963785,
+ "grad_norm": 21708.86328125,
+ "learning_rate": 7.276524841373576e-05,
+ "loss": 0.4331,
+ "step": 85150
+ },
+ {
+ "epoch": 0.4395808503722507,
+ "grad_norm": 22323.1015625,
+ "learning_rate": 7.273271825228157e-05,
+ "loss": 0.4372,
+ "step": 85200
+ },
+ {
+ "epoch": 0.4398388203548635,
+ "grad_norm": 21696.2734375,
+ "learning_rate": 7.270017595654255e-05,
+ "loss": 0.4271,
+ "step": 85250
+ },
+ {
+ "epoch": 0.44009679033747634,
+ "grad_norm": 23364.560546875,
+ "learning_rate": 7.266762154388917e-05,
+ "loss": 0.4327,
+ "step": 85300
+ },
+ {
+ "epoch": 0.44035476032008913,
+ "grad_norm": 21834.607421875,
+ "learning_rate": 7.263505503169834e-05,
+ "loss": 0.4337,
+ "step": 85350
+ },
+ {
+ "epoch": 0.440612730302702,
+ "grad_norm": 18636.244140625,
+ "learning_rate": 7.260247643735343e-05,
+ "loss": 0.4393,
+ "step": 85400
+ },
+ {
+ "epoch": 0.4408707002853148,
+ "grad_norm": 20385.875,
+ "learning_rate": 7.256988577824427e-05,
+ "loss": 0.4398,
+ "step": 85450
+ },
+ {
+ "epoch": 0.4411286702679276,
+ "grad_norm": 21459.576171875,
+ "learning_rate": 7.253728307176713e-05,
+ "loss": 0.435,
+ "step": 85500
+ },
+ {
+ "epoch": 0.4413866402505405,
+ "grad_norm": 22838.716796875,
+ "learning_rate": 7.25046683353247e-05,
+ "loss": 0.4368,
+ "step": 85550
+ },
+ {
+ "epoch": 0.44164461023315327,
+ "grad_norm": 23016.4140625,
+ "learning_rate": 7.247204158632608e-05,
+ "loss": 0.4353,
+ "step": 85600
+ },
+ {
+ "epoch": 0.4419025802157661,
+ "grad_norm": 22318.193359375,
+ "learning_rate": 7.243940284218682e-05,
+ "loss": 0.4374,
+ "step": 85650
+ },
+ {
+ "epoch": 0.4421605501983789,
+ "grad_norm": 20475.376953125,
+ "learning_rate": 7.240675212032884e-05,
+ "loss": 0.4339,
+ "step": 85700
+ },
+ {
+ "epoch": 0.44241852018099176,
+ "grad_norm": 22276.287109375,
+ "learning_rate": 7.237408943818042e-05,
+ "loss": 0.4275,
+ "step": 85750
+ },
+ {
+ "epoch": 0.44267649016360455,
+ "grad_norm": 22131.654296875,
+ "learning_rate": 7.234141481317634e-05,
+ "loss": 0.4373,
+ "step": 85800
+ },
+ {
+ "epoch": 0.4429344601462174,
+ "grad_norm": 24779.14453125,
+ "learning_rate": 7.230872826275765e-05,
+ "loss": 0.4347,
+ "step": 85850
+ },
+ {
+ "epoch": 0.4431924301288302,
+ "grad_norm": 22474.443359375,
+ "learning_rate": 7.227602980437179e-05,
+ "loss": 0.4341,
+ "step": 85900
+ },
+ {
+ "epoch": 0.44345040011144304,
+ "grad_norm": 21620.056640625,
+ "learning_rate": 7.224331945547258e-05,
+ "loss": 0.4399,
+ "step": 85950
+ },
+ {
+ "epoch": 0.44370837009405584,
+ "grad_norm": 21546.8046875,
+ "learning_rate": 7.221059723352014e-05,
+ "loss": 0.4437,
+ "step": 86000
+ },
+ {
+ "epoch": 0.4439663400766687,
+ "grad_norm": 22283.0078125,
+ "learning_rate": 7.2177863155981e-05,
+ "loss": 0.4403,
+ "step": 86050
+ },
+ {
+ "epoch": 0.4442243100592815,
+ "grad_norm": 21332.576171875,
+ "learning_rate": 7.214511724032795e-05,
+ "loss": 0.4369,
+ "step": 86100
+ },
+ {
+ "epoch": 0.44448228004189433,
+ "grad_norm": 23106.01953125,
+ "learning_rate": 7.211235950404013e-05,
+ "loss": 0.4369,
+ "step": 86150
+ },
+ {
+ "epoch": 0.4447402500245071,
+ "grad_norm": 21826.2734375,
+ "learning_rate": 7.207958996460298e-05,
+ "loss": 0.4407,
+ "step": 86200
+ },
+ {
+ "epoch": 0.44499822000711997,
+ "grad_norm": 22308.90625,
+ "learning_rate": 7.204680863950825e-05,
+ "loss": 0.4349,
+ "step": 86250
+ },
+ {
+ "epoch": 0.4452561899897328,
+ "grad_norm": 24916.359375,
+ "learning_rate": 7.2014015546254e-05,
+ "loss": 0.436,
+ "step": 86300
+ },
+ {
+ "epoch": 0.4455141599723456,
+ "grad_norm": 22585.77734375,
+ "learning_rate": 7.198121070234453e-05,
+ "loss": 0.4311,
+ "step": 86350
+ },
+ {
+ "epoch": 0.44577212995495846,
+ "grad_norm": 22984.658203125,
+ "learning_rate": 7.194839412529042e-05,
+ "loss": 0.4324,
+ "step": 86400
+ },
+ {
+ "epoch": 0.44603009993757126,
+ "grad_norm": 22495.552734375,
+ "learning_rate": 7.191556583260853e-05,
+ "loss": 0.4306,
+ "step": 86450
+ },
+ {
+ "epoch": 0.4462880699201841,
+ "grad_norm": 21413.2578125,
+ "learning_rate": 7.188272584182196e-05,
+ "loss": 0.4404,
+ "step": 86500
+ },
+ {
+ "epoch": 0.4465460399027969,
+ "grad_norm": 23719.43359375,
+ "learning_rate": 7.184987417046007e-05,
+ "loss": 0.4321,
+ "step": 86550
+ },
+ {
+ "epoch": 0.44680400988540975,
+ "grad_norm": 22586.095703125,
+ "learning_rate": 7.181701083605846e-05,
+ "loss": 0.4349,
+ "step": 86600
+ },
+ {
+ "epoch": 0.44706197986802254,
+ "grad_norm": 20580.166015625,
+ "learning_rate": 7.178413585615891e-05,
+ "loss": 0.4323,
+ "step": 86650
+ },
+ {
+ "epoch": 0.4473199498506354,
+ "grad_norm": 21345.71875,
+ "learning_rate": 7.175124924830948e-05,
+ "loss": 0.4326,
+ "step": 86700
+ },
+ {
+ "epoch": 0.4475779198332482,
+ "grad_norm": 20615.333984375,
+ "learning_rate": 7.171835103006438e-05,
+ "loss": 0.4425,
+ "step": 86750
+ },
+ {
+ "epoch": 0.44783588981586103,
+ "grad_norm": 25518.546875,
+ "learning_rate": 7.168544121898407e-05,
+ "loss": 0.4307,
+ "step": 86800
+ },
+ {
+ "epoch": 0.4480938597984738,
+ "grad_norm": 23149.703125,
+ "learning_rate": 7.165251983263512e-05,
+ "loss": 0.4336,
+ "step": 86850
+ },
+ {
+ "epoch": 0.4483518297810867,
+ "grad_norm": 22026.19140625,
+ "learning_rate": 7.16195868885904e-05,
+ "loss": 0.4401,
+ "step": 86900
+ },
+ {
+ "epoch": 0.44860979976369947,
+ "grad_norm": 21140.90234375,
+ "learning_rate": 7.158664240442881e-05,
+ "loss": 0.436,
+ "step": 86950
+ },
+ {
+ "epoch": 0.4488677697463123,
+ "grad_norm": 25489.1796875,
+ "learning_rate": 7.155368639773552e-05,
+ "loss": 0.4379,
+ "step": 87000
+ },
+ {
+ "epoch": 0.44912573972892517,
+ "grad_norm": 21035.275390625,
+ "learning_rate": 7.152071888610176e-05,
+ "loss": 0.433,
+ "step": 87050
+ },
+ {
+ "epoch": 0.44938370971153796,
+ "grad_norm": 25905.03515625,
+ "learning_rate": 7.148773988712503e-05,
+ "loss": 0.4423,
+ "step": 87100
+ },
+ {
+ "epoch": 0.4496416796941508,
+ "grad_norm": 21237.857421875,
+ "learning_rate": 7.14547494184088e-05,
+ "loss": 0.4346,
+ "step": 87150
+ },
+ {
+ "epoch": 0.4498996496767636,
+ "grad_norm": 19255.748046875,
+ "learning_rate": 7.14217474975628e-05,
+ "loss": 0.4333,
+ "step": 87200
+ },
+ {
+ "epoch": 0.45015761965937645,
+ "grad_norm": 22115.05078125,
+ "learning_rate": 7.138873414220277e-05,
+ "loss": 0.4371,
+ "step": 87250
+ },
+ {
+ "epoch": 0.45041558964198924,
+ "grad_norm": 23271.462890625,
+ "learning_rate": 7.135570936995064e-05,
+ "loss": 0.4362,
+ "step": 87300
+ },
+ {
+ "epoch": 0.4506735596246021,
+ "grad_norm": 24245.02734375,
+ "learning_rate": 7.132267319843438e-05,
+ "loss": 0.4371,
+ "step": 87350
+ },
+ {
+ "epoch": 0.4509315296072149,
+ "grad_norm": 22234.224609375,
+ "learning_rate": 7.128962564528805e-05,
+ "loss": 0.4306,
+ "step": 87400
+ },
+ {
+ "epoch": 0.45118949958982774,
+ "grad_norm": 22704.115234375,
+ "learning_rate": 7.12565667281518e-05,
+ "loss": 0.4408,
+ "step": 87450
+ },
+ {
+ "epoch": 0.45144746957244053,
+ "grad_norm": 21906.650390625,
+ "learning_rate": 7.122349646467183e-05,
+ "loss": 0.4322,
+ "step": 87500
+ },
+ {
+ "epoch": 0.4517054395550534,
+ "grad_norm": 21960.501953125,
+ "learning_rate": 7.119041487250045e-05,
+ "loss": 0.4322,
+ "step": 87550
+ },
+ {
+ "epoch": 0.45196340953766617,
+ "grad_norm": 20264.14453125,
+ "learning_rate": 7.11573219692959e-05,
+ "loss": 0.4403,
+ "step": 87600
+ },
+ {
+ "epoch": 0.452221379520279,
+ "grad_norm": 20237.078125,
+ "learning_rate": 7.112421777272259e-05,
+ "loss": 0.4421,
+ "step": 87650
+ },
+ {
+ "epoch": 0.45247934950289187,
+ "grad_norm": 22111.3203125,
+ "learning_rate": 7.109110230045087e-05,
+ "loss": 0.4386,
+ "step": 87700
+ },
+ {
+ "epoch": 0.45273731948550466,
+ "grad_norm": 20690.015625,
+ "learning_rate": 7.105797557015715e-05,
+ "loss": 0.4315,
+ "step": 87750
+ },
+ {
+ "epoch": 0.4529952894681175,
+ "grad_norm": 23273.888671875,
+ "learning_rate": 7.102483759952384e-05,
+ "loss": 0.4397,
+ "step": 87800
+ },
+ {
+ "epoch": 0.4532532594507303,
+ "grad_norm": 20268.541015625,
+ "learning_rate": 7.099168840623935e-05,
+ "loss": 0.4381,
+ "step": 87850
+ },
+ {
+ "epoch": 0.45351122943334315,
+ "grad_norm": 21591.724609375,
+ "learning_rate": 7.095852800799806e-05,
+ "loss": 0.4368,
+ "step": 87900
+ },
+ {
+ "epoch": 0.45376919941595595,
+ "grad_norm": 20683.994140625,
+ "learning_rate": 7.092535642250035e-05,
+ "loss": 0.4315,
+ "step": 87950
+ },
+ {
+ "epoch": 0.4540271693985688,
+ "grad_norm": 22910.26953125,
+ "learning_rate": 7.089217366745258e-05,
+ "loss": 0.4415,
+ "step": 88000
+ },
+ {
+ "epoch": 0.4542851393811816,
+ "grad_norm": 22321.40234375,
+ "learning_rate": 7.085897976056706e-05,
+ "loss": 0.4386,
+ "step": 88050
+ },
+ {
+ "epoch": 0.45454310936379444,
+ "grad_norm": 20730.521484375,
+ "learning_rate": 7.082577471956206e-05,
+ "loss": 0.4335,
+ "step": 88100
+ },
+ {
+ "epoch": 0.45480107934640723,
+ "grad_norm": 23302.033203125,
+ "learning_rate": 7.079255856216177e-05,
+ "loss": 0.4366,
+ "step": 88150
+ },
+ {
+ "epoch": 0.4550590493290201,
+ "grad_norm": 21125.5625,
+ "learning_rate": 7.075933130609636e-05,
+ "loss": 0.4388,
+ "step": 88200
+ },
+ {
+ "epoch": 0.4553170193116329,
+ "grad_norm": 24245.548828125,
+ "learning_rate": 7.072609296910187e-05,
+ "loss": 0.4369,
+ "step": 88250
+ },
+ {
+ "epoch": 0.4555749892942457,
+ "grad_norm": 19609.1484375,
+ "learning_rate": 7.06928435689203e-05,
+ "loss": 0.4287,
+ "step": 88300
+ },
+ {
+ "epoch": 0.4558329592768585,
+ "grad_norm": 21653.08984375,
+ "learning_rate": 7.065958312329953e-05,
+ "loss": 0.4357,
+ "step": 88350
+ },
+ {
+ "epoch": 0.45609092925947137,
+ "grad_norm": 23725.236328125,
+ "learning_rate": 7.062631164999331e-05,
+ "loss": 0.4382,
+ "step": 88400
+ },
+ {
+ "epoch": 0.4563488992420842,
+ "grad_norm": 21436.92578125,
+ "learning_rate": 7.059302916676137e-05,
+ "loss": 0.4373,
+ "step": 88450
+ },
+ {
+ "epoch": 0.456606869224697,
+ "grad_norm": 20179.189453125,
+ "learning_rate": 7.05597356913692e-05,
+ "loss": 0.4304,
+ "step": 88500
+ },
+ {
+ "epoch": 0.45686483920730986,
+ "grad_norm": 22804.22265625,
+ "learning_rate": 7.052643124158824e-05,
+ "loss": 0.4343,
+ "step": 88550
+ },
+ {
+ "epoch": 0.45712280918992265,
+ "grad_norm": 21530.931640625,
+ "learning_rate": 7.049311583519574e-05,
+ "loss": 0.4364,
+ "step": 88600
+ },
+ {
+ "epoch": 0.4573807791725355,
+ "grad_norm": 21411.646484375,
+ "learning_rate": 7.045978948997486e-05,
+ "loss": 0.436,
+ "step": 88650
+ },
+ {
+ "epoch": 0.4576387491551483,
+ "grad_norm": 20853.962890625,
+ "learning_rate": 7.042645222371451e-05,
+ "loss": 0.436,
+ "step": 88700
+ },
+ {
+ "epoch": 0.45789671913776114,
+ "grad_norm": 20940.28125,
+ "learning_rate": 7.039310405420952e-05,
+ "loss": 0.4349,
+ "step": 88750
+ },
+ {
+ "epoch": 0.45815468912037394,
+ "grad_norm": 22368.05078125,
+ "learning_rate": 7.035974499926045e-05,
+ "loss": 0.4355,
+ "step": 88800
+ },
+ {
+ "epoch": 0.4584126591029868,
+ "grad_norm": 21155.3984375,
+ "learning_rate": 7.032637507667377e-05,
+ "loss": 0.4292,
+ "step": 88850
+ },
+ {
+ "epoch": 0.4586706290855996,
+ "grad_norm": 21627.353515625,
+ "learning_rate": 7.029299430426164e-05,
+ "loss": 0.4404,
+ "step": 88900
+ },
+ {
+ "epoch": 0.45892859906821243,
+ "grad_norm": 22008.23046875,
+ "learning_rate": 7.025960269984212e-05,
+ "loss": 0.431,
+ "step": 88950
+ },
+ {
+ "epoch": 0.4591865690508252,
+ "grad_norm": 21588.109375,
+ "learning_rate": 7.022620028123898e-05,
+ "loss": 0.4319,
+ "step": 89000
+ },
+ {
+ "epoch": 0.45944453903343807,
+ "grad_norm": 21680.646484375,
+ "learning_rate": 7.019278706628179e-05,
+ "loss": 0.4403,
+ "step": 89050
+ },
+ {
+ "epoch": 0.4597025090160509,
+ "grad_norm": 25427.423828125,
+ "learning_rate": 7.015936307280587e-05,
+ "loss": 0.435,
+ "step": 89100
+ },
+ {
+ "epoch": 0.4599604789986637,
+ "grad_norm": 22674.693359375,
+ "learning_rate": 7.01259283186523e-05,
+ "loss": 0.4377,
+ "step": 89150
+ },
+ {
+ "epoch": 0.46021844898127656,
+ "grad_norm": 24841.029296875,
+ "learning_rate": 7.009248282166793e-05,
+ "loss": 0.4387,
+ "step": 89200
+ },
+ {
+ "epoch": 0.46047641896388936,
+ "grad_norm": 21259.369140625,
+ "learning_rate": 7.005902659970528e-05,
+ "loss": 0.4355,
+ "step": 89250
+ },
+ {
+ "epoch": 0.4607343889465022,
+ "grad_norm": 19364.466796875,
+ "learning_rate": 7.002555967062265e-05,
+ "loss": 0.4353,
+ "step": 89300
+ },
+ {
+ "epoch": 0.460992358929115,
+ "grad_norm": 25116.47265625,
+ "learning_rate": 6.999208205228405e-05,
+ "loss": 0.4328,
+ "step": 89350
+ },
+ {
+ "epoch": 0.46125032891172785,
+ "grad_norm": 24426.4296875,
+ "learning_rate": 6.995859376255918e-05,
+ "loss": 0.4331,
+ "step": 89400
+ },
+ {
+ "epoch": 0.46150829889434064,
+ "grad_norm": 20802.759765625,
+ "learning_rate": 6.99250948193234e-05,
+ "loss": 0.4294,
+ "step": 89450
+ },
+ {
+ "epoch": 0.4617662688769535,
+ "grad_norm": 23164.2109375,
+ "learning_rate": 6.989158524045787e-05,
+ "loss": 0.4338,
+ "step": 89500
+ },
+ {
+ "epoch": 0.4620242388595663,
+ "grad_norm": 20543.28515625,
+ "learning_rate": 6.98580650438493e-05,
+ "loss": 0.4243,
+ "step": 89550
+ },
+ {
+ "epoch": 0.46228220884217913,
+ "grad_norm": 22468.732421875,
+ "learning_rate": 6.982453424739016e-05,
+ "loss": 0.4306,
+ "step": 89600
+ },
+ {
+ "epoch": 0.4625401788247919,
+ "grad_norm": 22903.12890625,
+ "learning_rate": 6.979099286897849e-05,
+ "loss": 0.4316,
+ "step": 89650
+ },
+ {
+ "epoch": 0.4627981488074048,
+ "grad_norm": 23074.068359375,
+ "learning_rate": 6.975744092651808e-05,
+ "loss": 0.4371,
+ "step": 89700
+ },
+ {
+ "epoch": 0.46305611879001757,
+ "grad_norm": 22003.00390625,
+ "learning_rate": 6.972387843791827e-05,
+ "loss": 0.4329,
+ "step": 89750
+ },
+ {
+ "epoch": 0.4633140887726304,
+ "grad_norm": 21524.93359375,
+ "learning_rate": 6.969030542109407e-05,
+ "loss": 0.4348,
+ "step": 89800
+ },
+ {
+ "epoch": 0.46357205875524327,
+ "grad_norm": 20501.130859375,
+ "learning_rate": 6.965672189396614e-05,
+ "loss": 0.4286,
+ "step": 89850
+ },
+ {
+ "epoch": 0.46383002873785606,
+ "grad_norm": 21559.396484375,
+ "learning_rate": 6.962312787446068e-05,
+ "loss": 0.434,
+ "step": 89900
+ },
+ {
+ "epoch": 0.4640879987204689,
+ "grad_norm": 21185.537109375,
+ "learning_rate": 6.958952338050955e-05,
+ "loss": 0.4326,
+ "step": 89950
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "grad_norm": 23004.626953125,
+ "learning_rate": 6.955590843005016e-05,
+ "loss": 0.4272,
+ "step": 90000
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "eval_loss": 0.4223860800266266,
+ "eval_runtime": 3251.8949,
+ "eval_samples_per_second": 953.635,
+ "eval_steps_per_second": 1.863,
+ "step": 90000
+ },
+ {
+ "epoch": 0.46460393868569455,
+ "grad_norm": 20333.259765625,
+ "learning_rate": 6.952228304102553e-05,
+ "loss": 0.4338,
+ "step": 90050
+ },
+ {
+ "epoch": 0.46486190866830734,
+ "grad_norm": 25967.029296875,
+ "learning_rate": 6.948864723138423e-05,
+ "loss": 0.4352,
+ "step": 90100
+ },
+ {
+ "epoch": 0.4651198786509202,
+ "grad_norm": 22849.9375,
+ "learning_rate": 6.945500101908043e-05,
+ "loss": 0.4358,
+ "step": 90150
+ },
+ {
+ "epoch": 0.465377848633533,
+ "grad_norm": 20628.9453125,
+ "learning_rate": 6.94213444220738e-05,
+ "loss": 0.4343,
+ "step": 90200
+ },
+ {
+ "epoch": 0.46563581861614584,
+ "grad_norm": 22179.84375,
+ "learning_rate": 6.938767745832959e-05,
+ "loss": 0.4314,
+ "step": 90250
+ },
+ {
+ "epoch": 0.46589378859875863,
+ "grad_norm": 24433.46484375,
+ "learning_rate": 6.935400014581858e-05,
+ "loss": 0.436,
+ "step": 90300
+ },
+ {
+ "epoch": 0.4661517585813715,
+ "grad_norm": 21914.666015625,
+ "learning_rate": 6.932031250251705e-05,
+ "loss": 0.431,
+ "step": 90350
+ },
+ {
+ "epoch": 0.46640972856398427,
+ "grad_norm": 19517.78125,
+ "learning_rate": 6.928661454640683e-05,
+ "loss": 0.4282,
+ "step": 90400
+ },
+ {
+ "epoch": 0.4666676985465971,
+ "grad_norm": 25924.5234375,
+ "learning_rate": 6.925290629547522e-05,
+ "loss": 0.4344,
+ "step": 90450
+ },
+ {
+ "epoch": 0.4669256685292099,
+ "grad_norm": 20866.927734375,
+ "learning_rate": 6.921918776771505e-05,
+ "loss": 0.4336,
+ "step": 90500
+ },
+ {
+ "epoch": 0.46718363851182276,
+ "grad_norm": 22734.5625,
+ "learning_rate": 6.91854589811246e-05,
+ "loss": 0.4375,
+ "step": 90550
+ },
+ {
+ "epoch": 0.4674416084944356,
+ "grad_norm": 21173.5703125,
+ "learning_rate": 6.915171995370766e-05,
+ "loss": 0.428,
+ "step": 90600
+ },
+ {
+ "epoch": 0.4676995784770484,
+ "grad_norm": 23864.681640625,
+ "learning_rate": 6.911797070347346e-05,
+ "loss": 0.4344,
+ "step": 90650
+ },
+ {
+ "epoch": 0.46795754845966125,
+ "grad_norm": 26236.091796875,
+ "learning_rate": 6.908421124843669e-05,
+ "loss": 0.4345,
+ "step": 90700
+ },
+ {
+ "epoch": 0.46821551844227405,
+ "grad_norm": 20788.6015625,
+ "learning_rate": 6.905044160661748e-05,
+ "loss": 0.4332,
+ "step": 90750
+ },
+ {
+ "epoch": 0.4684734884248869,
+ "grad_norm": 21382.2578125,
+ "learning_rate": 6.901666179604148e-05,
+ "loss": 0.4356,
+ "step": 90800
+ },
+ {
+ "epoch": 0.4687314584074997,
+ "grad_norm": 20230.220703125,
+ "learning_rate": 6.898287183473961e-05,
+ "loss": 0.4262,
+ "step": 90850
+ },
+ {
+ "epoch": 0.46898942839011254,
+ "grad_norm": 31838.697265625,
+ "learning_rate": 6.894907174074836e-05,
+ "loss": 0.4316,
+ "step": 90900
+ },
+ {
+ "epoch": 0.46924739837272533,
+ "grad_norm": 21029.5234375,
+ "learning_rate": 6.891526153210953e-05,
+ "loss": 0.4346,
+ "step": 90950
+ },
+ {
+ "epoch": 0.4695053683553382,
+ "grad_norm": 23617.826171875,
+ "learning_rate": 6.888144122687035e-05,
+ "loss": 0.4262,
+ "step": 91000
+ },
+ {
+ "epoch": 0.469763338337951,
+ "grad_norm": 23151.751953125,
+ "learning_rate": 6.884761084308349e-05,
+ "loss": 0.4296,
+ "step": 91050
+ },
+ {
+ "epoch": 0.4700213083205638,
+ "grad_norm": 19649.466796875,
+ "learning_rate": 6.881377039880692e-05,
+ "loss": 0.4325,
+ "step": 91100
+ },
+ {
+ "epoch": 0.4702792783031766,
+ "grad_norm": 20488.10546875,
+ "learning_rate": 6.8779919912104e-05,
+ "loss": 0.4352,
+ "step": 91150
+ },
+ {
+ "epoch": 0.47053724828578947,
+ "grad_norm": 21639.306640625,
+ "learning_rate": 6.874605940104349e-05,
+ "loss": 0.4319,
+ "step": 91200
+ },
+ {
+ "epoch": 0.4707952182684023,
+ "grad_norm": 21799.994140625,
+ "learning_rate": 6.871218888369947e-05,
+ "loss": 0.4315,
+ "step": 91250
+ },
+ {
+ "epoch": 0.4710531882510151,
+ "grad_norm": 22425.94140625,
+ "learning_rate": 6.867830837815137e-05,
+ "loss": 0.4381,
+ "step": 91300
+ },
+ {
+ "epoch": 0.47131115823362796,
+ "grad_norm": 22582.57421875,
+ "learning_rate": 6.864441790248396e-05,
+ "loss": 0.4297,
+ "step": 91350
+ },
+ {
+ "epoch": 0.47156912821624075,
+ "grad_norm": 21082.38671875,
+ "learning_rate": 6.861051747478726e-05,
+ "loss": 0.4292,
+ "step": 91400
+ },
+ {
+ "epoch": 0.4718270981988536,
+ "grad_norm": 23156.5546875,
+ "learning_rate": 6.857660711315672e-05,
+ "loss": 0.4276,
+ "step": 91450
+ },
+ {
+ "epoch": 0.4720850681814664,
+ "grad_norm": 21754.6796875,
+ "learning_rate": 6.854268683569302e-05,
+ "loss": 0.4369,
+ "step": 91500
+ },
+ {
+ "epoch": 0.47234303816407924,
+ "grad_norm": 22397.896484375,
+ "learning_rate": 6.850875666050216e-05,
+ "loss": 0.4312,
+ "step": 91550
+ },
+ {
+ "epoch": 0.47260100814669204,
+ "grad_norm": 21344.166015625,
+ "learning_rate": 6.847481660569537e-05,
+ "loss": 0.4291,
+ "step": 91600
+ },
+ {
+ "epoch": 0.4728589781293049,
+ "grad_norm": 23818.71484375,
+ "learning_rate": 6.844086668938923e-05,
+ "loss": 0.4352,
+ "step": 91650
+ },
+ {
+ "epoch": 0.4731169481119177,
+ "grad_norm": 21734.537109375,
+ "learning_rate": 6.840690692970554e-05,
+ "loss": 0.4326,
+ "step": 91700
+ },
+ {
+ "epoch": 0.47337491809453053,
+ "grad_norm": 22027.734375,
+ "learning_rate": 6.837293734477136e-05,
+ "loss": 0.4369,
+ "step": 91750
+ },
+ {
+ "epoch": 0.4736328880771433,
+ "grad_norm": 23111.103515625,
+ "learning_rate": 6.8338957952719e-05,
+ "loss": 0.4396,
+ "step": 91800
+ },
+ {
+ "epoch": 0.47389085805975617,
+ "grad_norm": 22521.767578125,
+ "learning_rate": 6.830496877168599e-05,
+ "loss": 0.4376,
+ "step": 91850
+ },
+ {
+ "epoch": 0.47414882804236896,
+ "grad_norm": 19730.158203125,
+ "learning_rate": 6.827096981981511e-05,
+ "loss": 0.4321,
+ "step": 91900
+ },
+ {
+ "epoch": 0.4744067980249818,
+ "grad_norm": 21871.134765625,
+ "learning_rate": 6.823696111525433e-05,
+ "loss": 0.4373,
+ "step": 91950
+ },
+ {
+ "epoch": 0.47466476800759466,
+ "grad_norm": 22332.384765625,
+ "learning_rate": 6.820294267615686e-05,
+ "loss": 0.4323,
+ "step": 92000
+ },
+ {
+ "epoch": 0.47492273799020746,
+ "grad_norm": 22426.59765625,
+ "learning_rate": 6.816891452068104e-05,
+ "loss": 0.4272,
+ "step": 92050
+ },
+ {
+ "epoch": 0.4751807079728203,
+ "grad_norm": 23286.05859375,
+ "learning_rate": 6.81348766669905e-05,
+ "loss": 0.4442,
+ "step": 92100
+ },
+ {
+ "epoch": 0.4754386779554331,
+ "grad_norm": 21696.1171875,
+ "learning_rate": 6.810082913325395e-05,
+ "loss": 0.4288,
+ "step": 92150
+ },
+ {
+ "epoch": 0.47569664793804595,
+ "grad_norm": 20548.908203125,
+ "learning_rate": 6.80667719376453e-05,
+ "loss": 0.4358,
+ "step": 92200
+ },
+ {
+ "epoch": 0.47595461792065874,
+ "grad_norm": 22605.1640625,
+ "learning_rate": 6.803270509834363e-05,
+ "loss": 0.4327,
+ "step": 92250
+ },
+ {
+ "epoch": 0.4762125879032716,
+ "grad_norm": 23604.30078125,
+ "learning_rate": 6.799862863353318e-05,
+ "loss": 0.441,
+ "step": 92300
+ },
+ {
+ "epoch": 0.4764705578858844,
+ "grad_norm": 22117.1796875,
+ "learning_rate": 6.796454256140328e-05,
+ "loss": 0.4289,
+ "step": 92350
+ },
+ {
+ "epoch": 0.47672852786849723,
+ "grad_norm": 22476.54296875,
+ "learning_rate": 6.793044690014842e-05,
+ "loss": 0.4319,
+ "step": 92400
+ },
+ {
+ "epoch": 0.47698649785111,
+ "grad_norm": 20855.140625,
+ "learning_rate": 6.789634166796821e-05,
+ "loss": 0.4326,
+ "step": 92450
+ },
+ {
+ "epoch": 0.4772444678337229,
+ "grad_norm": 23704.125,
+ "learning_rate": 6.786222688306734e-05,
+ "loss": 0.4374,
+ "step": 92500
+ },
+ {
+ "epoch": 0.47750243781633567,
+ "grad_norm": 20677.91015625,
+ "learning_rate": 6.782810256365568e-05,
+ "loss": 0.4261,
+ "step": 92550
+ },
+ {
+ "epoch": 0.4777604077989485,
+ "grad_norm": 21245.837890625,
+ "learning_rate": 6.779396872794807e-05,
+ "loss": 0.4309,
+ "step": 92600
+ },
+ {
+ "epoch": 0.4780183777815613,
+ "grad_norm": 25415.859375,
+ "learning_rate": 6.775982539416453e-05,
+ "loss": 0.437,
+ "step": 92650
+ },
+ {
+ "epoch": 0.47827634776417416,
+ "grad_norm": 20582.556640625,
+ "learning_rate": 6.772567258053007e-05,
+ "loss": 0.4349,
+ "step": 92700
+ },
+ {
+ "epoch": 0.478534317746787,
+ "grad_norm": 20002.013671875,
+ "learning_rate": 6.769151030527483e-05,
+ "loss": 0.4263,
+ "step": 92750
+ },
+ {
+ "epoch": 0.4787922877293998,
+ "grad_norm": 23287.6875,
+ "learning_rate": 6.765733858663397e-05,
+ "loss": 0.4332,
+ "step": 92800
+ },
+ {
+ "epoch": 0.47905025771201265,
+ "grad_norm": 22023.66796875,
+ "learning_rate": 6.76231574428477e-05,
+ "loss": 0.4339,
+ "step": 92850
+ },
+ {
+ "epoch": 0.47930822769462544,
+ "grad_norm": 21299.185546875,
+ "learning_rate": 6.758896689216122e-05,
+ "loss": 0.4293,
+ "step": 92900
+ },
+ {
+ "epoch": 0.4795661976772383,
+ "grad_norm": 21979.560546875,
+ "learning_rate": 6.755476695282479e-05,
+ "loss": 0.4314,
+ "step": 92950
+ },
+ {
+ "epoch": 0.4798241676598511,
+ "grad_norm": 21399.029296875,
+ "learning_rate": 6.752055764309372e-05,
+ "loss": 0.4374,
+ "step": 93000
+ },
+ {
+ "epoch": 0.48008213764246394,
+ "grad_norm": 23827.685546875,
+ "learning_rate": 6.748633898122823e-05,
+ "loss": 0.4348,
+ "step": 93050
+ },
+ {
+ "epoch": 0.48034010762507673,
+ "grad_norm": 21079.61328125,
+ "learning_rate": 6.74521109854936e-05,
+ "loss": 0.4312,
+ "step": 93100
+ },
+ {
+ "epoch": 0.4805980776076896,
+ "grad_norm": 20395.04296875,
+ "learning_rate": 6.741787367416006e-05,
+ "loss": 0.4246,
+ "step": 93150
+ },
+ {
+ "epoch": 0.48085604759030237,
+ "grad_norm": 21922.576171875,
+ "learning_rate": 6.738362706550284e-05,
+ "loss": 0.4355,
+ "step": 93200
+ },
+ {
+ "epoch": 0.4811140175729152,
+ "grad_norm": 21317.001953125,
+ "learning_rate": 6.734937117780211e-05,
+ "loss": 0.4302,
+ "step": 93250
+ },
+ {
+ "epoch": 0.481371987555528,
+ "grad_norm": 21387.46484375,
+ "learning_rate": 6.731510602934298e-05,
+ "loss": 0.434,
+ "step": 93300
+ },
+ {
+ "epoch": 0.48162995753814086,
+ "grad_norm": 24289.28515625,
+ "learning_rate": 6.728083163841554e-05,
+ "loss": 0.4338,
+ "step": 93350
+ },
+ {
+ "epoch": 0.4818879275207537,
+ "grad_norm": 23514.162109375,
+ "learning_rate": 6.72465480233148e-05,
+ "loss": 0.4357,
+ "step": 93400
+ },
+ {
+ "epoch": 0.4821458975033665,
+ "grad_norm": 21481.0859375,
+ "learning_rate": 6.721225520234068e-05,
+ "loss": 0.4307,
+ "step": 93450
+ },
+ {
+ "epoch": 0.48240386748597935,
+ "grad_norm": 25044.396484375,
+ "learning_rate": 6.717795319379805e-05,
+ "loss": 0.4335,
+ "step": 93500
+ },
+ {
+ "epoch": 0.48266183746859215,
+ "grad_norm": 21193.333984375,
+ "learning_rate": 6.714364201599662e-05,
+ "loss": 0.4243,
+ "step": 93550
+ },
+ {
+ "epoch": 0.482919807451205,
+ "grad_norm": 19113.275390625,
+ "learning_rate": 6.710932168725105e-05,
+ "loss": 0.4331,
+ "step": 93600
+ },
+ {
+ "epoch": 0.4831777774338178,
+ "grad_norm": 21924.162109375,
+ "learning_rate": 6.707499222588087e-05,
+ "loss": 0.4309,
+ "step": 93650
+ },
+ {
+ "epoch": 0.48343574741643064,
+ "grad_norm": 21123.498046875,
+ "learning_rate": 6.704065365021048e-05,
+ "loss": 0.4392,
+ "step": 93700
+ },
+ {
+ "epoch": 0.48369371739904343,
+ "grad_norm": 22201.29296875,
+ "learning_rate": 6.700630597856914e-05,
+ "loss": 0.4281,
+ "step": 93750
+ },
+ {
+ "epoch": 0.4839516873816563,
+ "grad_norm": 24237.494140625,
+ "learning_rate": 6.697194922929096e-05,
+ "loss": 0.4367,
+ "step": 93800
+ },
+ {
+ "epoch": 0.4842096573642691,
+ "grad_norm": 21306.8125,
+ "learning_rate": 6.693758342071495e-05,
+ "loss": 0.4374,
+ "step": 93850
+ },
+ {
+ "epoch": 0.4844676273468819,
+ "grad_norm": 22120.75,
+ "learning_rate": 6.690320857118488e-05,
+ "loss": 0.4309,
+ "step": 93900
+ },
+ {
+ "epoch": 0.4847255973294947,
+ "grad_norm": 20799.59765625,
+ "learning_rate": 6.686882469904939e-05,
+ "loss": 0.4262,
+ "step": 93950
+ },
+ {
+ "epoch": 0.48498356731210757,
+ "grad_norm": 22964.642578125,
+ "learning_rate": 6.683443182266192e-05,
+ "loss": 0.4338,
+ "step": 94000
+ },
+ {
+ "epoch": 0.48524153729472036,
+ "grad_norm": 22017.076171875,
+ "learning_rate": 6.68000299603807e-05,
+ "loss": 0.4317,
+ "step": 94050
+ },
+ {
+ "epoch": 0.4854995072773332,
+ "grad_norm": 21423.890625,
+ "learning_rate": 6.676561913056884e-05,
+ "loss": 0.4329,
+ "step": 94100
+ },
+ {
+ "epoch": 0.48575747725994606,
+ "grad_norm": 22123.390625,
+ "learning_rate": 6.67311993515941e-05,
+ "loss": 0.4309,
+ "step": 94150
+ },
+ {
+ "epoch": 0.48601544724255885,
+ "grad_norm": 23107.208984375,
+ "learning_rate": 6.669677064182915e-05,
+ "loss": 0.4316,
+ "step": 94200
+ },
+ {
+ "epoch": 0.4862734172251717,
+ "grad_norm": 21250.33203125,
+ "learning_rate": 6.666233301965132e-05,
+ "loss": 0.4289,
+ "step": 94250
+ },
+ {
+ "epoch": 0.4865313872077845,
+ "grad_norm": 21629.720703125,
+ "learning_rate": 6.66278865034428e-05,
+ "loss": 0.4301,
+ "step": 94300
+ },
+ {
+ "epoch": 0.48678935719039734,
+ "grad_norm": 23665.4609375,
+ "learning_rate": 6.659343111159043e-05,
+ "loss": 0.4267,
+ "step": 94350
+ },
+ {
+ "epoch": 0.48704732717301014,
+ "grad_norm": 23254.232421875,
+ "learning_rate": 6.655896686248583e-05,
+ "loss": 0.4266,
+ "step": 94400
+ },
+ {
+ "epoch": 0.487305297155623,
+ "grad_norm": 22491.404296875,
+ "learning_rate": 6.652449377452539e-05,
+ "loss": 0.4278,
+ "step": 94450
+ },
+ {
+ "epoch": 0.4875632671382358,
+ "grad_norm": 21071.74609375,
+ "learning_rate": 6.649001186611015e-05,
+ "loss": 0.4308,
+ "step": 94500
+ },
+ {
+ "epoch": 0.4878212371208486,
+ "grad_norm": 20860.861328125,
+ "learning_rate": 6.64555211556459e-05,
+ "loss": 0.4308,
+ "step": 94550
+ },
+ {
+ "epoch": 0.4880792071034614,
+ "grad_norm": 21733.033203125,
+ "learning_rate": 6.642102166154308e-05,
+ "loss": 0.4376,
+ "step": 94600
+ },
+ {
+ "epoch": 0.48833717708607427,
+ "grad_norm": 22799.3984375,
+ "learning_rate": 6.638651340221687e-05,
+ "loss": 0.4289,
+ "step": 94650
+ },
+ {
+ "epoch": 0.48859514706868706,
+ "grad_norm": 21678.296875,
+ "learning_rate": 6.635199639608709e-05,
+ "loss": 0.4301,
+ "step": 94700
+ },
+ {
+ "epoch": 0.4888531170512999,
+ "grad_norm": 20510.052734375,
+ "learning_rate": 6.631747066157831e-05,
+ "loss": 0.4276,
+ "step": 94750
+ },
+ {
+ "epoch": 0.48911108703391276,
+ "grad_norm": 21075.474609375,
+ "learning_rate": 6.628293621711964e-05,
+ "loss": 0.435,
+ "step": 94800
+ },
+ {
+ "epoch": 0.48936905701652555,
+ "grad_norm": 22063.083984375,
+ "learning_rate": 6.624839308114492e-05,
+ "loss": 0.434,
+ "step": 94850
+ },
+ {
+ "epoch": 0.4896270269991384,
+ "grad_norm": 20185.99609375,
+ "learning_rate": 6.621384127209261e-05,
+ "loss": 0.4246,
+ "step": 94900
+ },
+ {
+ "epoch": 0.4898849969817512,
+ "grad_norm": 22002.326171875,
+ "learning_rate": 6.61792808084058e-05,
+ "loss": 0.4272,
+ "step": 94950
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "grad_norm": 22271.25,
+ "learning_rate": 6.614471170853218e-05,
+ "loss": 0.4323,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "eval_loss": 0.4187907576560974,
+ "eval_runtime": 3274.3922,
+ "eval_samples_per_second": 947.083,
+ "eval_steps_per_second": 1.85,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49040093694697684,
+ "grad_norm": 20668.224609375,
+ "learning_rate": 6.611013399092406e-05,
+ "loss": 0.4285,
+ "step": 95050
+ },
+ {
+ "epoch": 0.4906589069295897,
+ "grad_norm": 20890.05078125,
+ "learning_rate": 6.607554767403838e-05,
+ "loss": 0.4333,
+ "step": 95100
+ },
+ {
+ "epoch": 0.4909168769122025,
+ "grad_norm": 22767.6875,
+ "learning_rate": 6.604095277633664e-05,
+ "loss": 0.4284,
+ "step": 95150
+ },
+ {
+ "epoch": 0.49117484689481533,
+ "grad_norm": 22603.083984375,
+ "learning_rate": 6.600634931628493e-05,
+ "loss": 0.4332,
+ "step": 95200
+ },
+ {
+ "epoch": 0.4914328168774281,
+ "grad_norm": 25005.8984375,
+ "learning_rate": 6.597173731235388e-05,
+ "loss": 0.4284,
+ "step": 95250
+ },
+ {
+ "epoch": 0.491690786860041,
+ "grad_norm": 23687.4765625,
+ "learning_rate": 6.593711678301874e-05,
+ "loss": 0.4316,
+ "step": 95300
+ },
+ {
+ "epoch": 0.49194875684265377,
+ "grad_norm": 19670.087890625,
+ "learning_rate": 6.590248774675926e-05,
+ "loss": 0.4326,
+ "step": 95350
+ },
+ {
+ "epoch": 0.4922067268252666,
+ "grad_norm": 23065.818359375,
+ "learning_rate": 6.586785022205977e-05,
+ "loss": 0.4316,
+ "step": 95400
+ },
+ {
+ "epoch": 0.4924646968078794,
+ "grad_norm": 21279.01953125,
+ "learning_rate": 6.583320422740909e-05,
+ "loss": 0.4278,
+ "step": 95450
+ },
+ {
+ "epoch": 0.49272266679049226,
+ "grad_norm": 19707.6328125,
+ "learning_rate": 6.579854978130057e-05,
+ "loss": 0.4272,
+ "step": 95500
+ },
+ {
+ "epoch": 0.4929806367731051,
+ "grad_norm": 22938.3515625,
+ "learning_rate": 6.57638869022321e-05,
+ "loss": 0.4316,
+ "step": 95550
+ },
+ {
+ "epoch": 0.4932386067557179,
+ "grad_norm": 24812.65625,
+ "learning_rate": 6.572921560870607e-05,
+ "loss": 0.4315,
+ "step": 95600
+ },
+ {
+ "epoch": 0.49349657673833075,
+ "grad_norm": 21462.873046875,
+ "learning_rate": 6.569453591922931e-05,
+ "loss": 0.4299,
+ "step": 95650
+ },
+ {
+ "epoch": 0.49375454672094354,
+ "grad_norm": 22590.384765625,
+ "learning_rate": 6.565984785231318e-05,
+ "loss": 0.4294,
+ "step": 95700
+ },
+ {
+ "epoch": 0.4940125167035564,
+ "grad_norm": 23677.619140625,
+ "learning_rate": 6.56251514264735e-05,
+ "loss": 0.4379,
+ "step": 95750
+ },
+ {
+ "epoch": 0.4942704866861692,
+ "grad_norm": 22078.87109375,
+ "learning_rate": 6.559044666023057e-05,
+ "loss": 0.4276,
+ "step": 95800
+ },
+ {
+ "epoch": 0.49452845666878203,
+ "grad_norm": 22440.369140625,
+ "learning_rate": 6.55557335721091e-05,
+ "loss": 0.4279,
+ "step": 95850
+ },
+ {
+ "epoch": 0.49478642665139483,
+ "grad_norm": 24544.12109375,
+ "learning_rate": 6.552101218063826e-05,
+ "loss": 0.4305,
+ "step": 95900
+ },
+ {
+ "epoch": 0.4950443966340077,
+ "grad_norm": 21647.107421875,
+ "learning_rate": 6.548628250435167e-05,
+ "loss": 0.4328,
+ "step": 95950
+ },
+ {
+ "epoch": 0.49530236661662047,
+ "grad_norm": 21392.28125,
+ "learning_rate": 6.545154456178735e-05,
+ "loss": 0.4299,
+ "step": 96000
+ },
+ {
+ "epoch": 0.4955603365992333,
+ "grad_norm": 19458.55078125,
+ "learning_rate": 6.541679837148775e-05,
+ "loss": 0.4375,
+ "step": 96050
+ },
+ {
+ "epoch": 0.4958183065818461,
+ "grad_norm": 21774.14453125,
+ "learning_rate": 6.53820439519997e-05,
+ "loss": 0.4348,
+ "step": 96100
+ },
+ {
+ "epoch": 0.49607627656445896,
+ "grad_norm": 22902.63671875,
+ "learning_rate": 6.534728132187444e-05,
+ "loss": 0.4297,
+ "step": 96150
+ },
+ {
+ "epoch": 0.49633424654707176,
+ "grad_norm": 20869.306640625,
+ "learning_rate": 6.531251049966762e-05,
+ "loss": 0.4313,
+ "step": 96200
+ },
+ {
+ "epoch": 0.4965922165296846,
+ "grad_norm": 23554.537109375,
+ "learning_rate": 6.527773150393919e-05,
+ "loss": 0.4313,
+ "step": 96250
+ },
+ {
+ "epoch": 0.49685018651229745,
+ "grad_norm": 23000.92578125,
+ "learning_rate": 6.524294435325351e-05,
+ "loss": 0.4266,
+ "step": 96300
+ },
+ {
+ "epoch": 0.49710815649491025,
+ "grad_norm": 21331.72265625,
+ "learning_rate": 6.52081490661793e-05,
+ "loss": 0.4261,
+ "step": 96350
+ },
+ {
+ "epoch": 0.4973661264775231,
+ "grad_norm": 22540.75,
+ "learning_rate": 6.517334566128961e-05,
+ "loss": 0.4282,
+ "step": 96400
+ },
+ {
+ "epoch": 0.4976240964601359,
+ "grad_norm": 21733.560546875,
+ "learning_rate": 6.51385341571618e-05,
+ "loss": 0.43,
+ "step": 96450
+ },
+ {
+ "epoch": 0.49788206644274874,
+ "grad_norm": 23288.21875,
+ "learning_rate": 6.510371457237765e-05,
+ "loss": 0.4306,
+ "step": 96500
+ },
+ {
+ "epoch": 0.49814003642536153,
+ "grad_norm": 24475.9453125,
+ "learning_rate": 6.506888692552309e-05,
+ "loss": 0.4299,
+ "step": 96550
+ },
+ {
+ "epoch": 0.4983980064079744,
+ "grad_norm": 20756.5078125,
+ "learning_rate": 6.503405123518847e-05,
+ "loss": 0.4292,
+ "step": 96600
+ },
+ {
+ "epoch": 0.4986559763905872,
+ "grad_norm": 21059.365234375,
+ "learning_rate": 6.499920751996845e-05,
+ "loss": 0.4261,
+ "step": 96650
+ },
+ {
+ "epoch": 0.4989139463732,
+ "grad_norm": 22173.65625,
+ "learning_rate": 6.496435579846188e-05,
+ "loss": 0.4309,
+ "step": 96700
+ },
+ {
+ "epoch": 0.4991719163558128,
+ "grad_norm": 23941.49609375,
+ "learning_rate": 6.492949608927196e-05,
+ "loss": 0.4355,
+ "step": 96750
+ },
+ {
+ "epoch": 0.49942988633842567,
+ "grad_norm": 22027.400390625,
+ "learning_rate": 6.489462841100611e-05,
+ "loss": 0.433,
+ "step": 96800
+ },
+ {
+ "epoch": 0.49968785632103846,
+ "grad_norm": 21414.77734375,
+ "learning_rate": 6.485975278227605e-05,
+ "loss": 0.4291,
+ "step": 96850
+ },
+ {
+ "epoch": 0.4999458263036513,
+ "grad_norm": 23023.60546875,
+ "learning_rate": 6.482486922169767e-05,
+ "loss": 0.4309,
+ "step": 96900
+ },
+ {
+ "epoch": 0.5002037962862641,
+ "grad_norm": 23856.318359375,
+ "learning_rate": 6.478997774789119e-05,
+ "loss": 0.4314,
+ "step": 96950
+ },
+ {
+ "epoch": 0.500461766268877,
+ "grad_norm": 21834.822265625,
+ "learning_rate": 6.475507837948096e-05,
+ "loss": 0.4319,
+ "step": 97000
+ },
+ {
+ "epoch": 0.5007197362514898,
+ "grad_norm": 22487.779296875,
+ "learning_rate": 6.472017113509561e-05,
+ "loss": 0.4281,
+ "step": 97050
+ },
+ {
+ "epoch": 0.5009777062341026,
+ "grad_norm": 23955.73046875,
+ "learning_rate": 6.468525603336796e-05,
+ "loss": 0.4324,
+ "step": 97100
+ },
+ {
+ "epoch": 0.5012356762167154,
+ "grad_norm": 23631.203125,
+ "learning_rate": 6.4650333092935e-05,
+ "loss": 0.4333,
+ "step": 97150
+ },
+ {
+ "epoch": 0.5014936461993282,
+ "grad_norm": 21347.26953125,
+ "learning_rate": 6.461540233243792e-05,
+ "loss": 0.421,
+ "step": 97200
+ },
+ {
+ "epoch": 0.5017516161819411,
+ "grad_norm": 23590.9140625,
+ "learning_rate": 6.458046377052209e-05,
+ "loss": 0.4347,
+ "step": 97250
+ },
+ {
+ "epoch": 0.5020095861645539,
+ "grad_norm": 23192.708984375,
+ "learning_rate": 6.454551742583703e-05,
+ "loss": 0.4363,
+ "step": 97300
+ },
+ {
+ "epoch": 0.5022675561471667,
+ "grad_norm": 23588.974609375,
+ "learning_rate": 6.451056331703643e-05,
+ "loss": 0.4268,
+ "step": 97350
+ },
+ {
+ "epoch": 0.5025255261297795,
+ "grad_norm": 19536.3046875,
+ "learning_rate": 6.44756014627781e-05,
+ "loss": 0.4268,
+ "step": 97400
+ },
+ {
+ "epoch": 0.5027834961123924,
+ "grad_norm": 20248.345703125,
+ "learning_rate": 6.444063188172401e-05,
+ "loss": 0.4286,
+ "step": 97450
+ },
+ {
+ "epoch": 0.5030414660950052,
+ "grad_norm": 21598.1171875,
+ "learning_rate": 6.440565459254027e-05,
+ "loss": 0.4302,
+ "step": 97500
+ },
+ {
+ "epoch": 0.503299436077618,
+ "grad_norm": 25492.541015625,
+ "learning_rate": 6.437066961389704e-05,
+ "loss": 0.4223,
+ "step": 97550
+ },
+ {
+ "epoch": 0.5035574060602308,
+ "grad_norm": 22227.8125,
+ "learning_rate": 6.433567696446865e-05,
+ "loss": 0.4194,
+ "step": 97600
+ },
+ {
+ "epoch": 0.5038153760428437,
+ "grad_norm": 23799.134765625,
+ "learning_rate": 6.430067666293348e-05,
+ "loss": 0.4239,
+ "step": 97650
+ },
+ {
+ "epoch": 0.5040733460254565,
+ "grad_norm": 25147.080078125,
+ "learning_rate": 6.426566872797403e-05,
+ "loss": 0.4369,
+ "step": 97700
+ },
+ {
+ "epoch": 0.5043313160080694,
+ "grad_norm": 22497.68359375,
+ "learning_rate": 6.423065317827686e-05,
+ "loss": 0.4332,
+ "step": 97750
+ },
+ {
+ "epoch": 0.5045892859906821,
+ "grad_norm": 23273.966796875,
+ "learning_rate": 6.419563003253258e-05,
+ "loss": 0.4331,
+ "step": 97800
+ },
+ {
+ "epoch": 0.5048472559732949,
+ "grad_norm": 21943.7734375,
+ "learning_rate": 6.416059930943585e-05,
+ "loss": 0.4331,
+ "step": 97850
+ },
+ {
+ "epoch": 0.5051052259559078,
+ "grad_norm": 23134.685546875,
+ "learning_rate": 6.412556102768544e-05,
+ "loss": 0.4283,
+ "step": 97900
+ },
+ {
+ "epoch": 0.5053631959385206,
+ "grad_norm": 21504.177734375,
+ "learning_rate": 6.409051520598405e-05,
+ "loss": 0.4319,
+ "step": 97950
+ },
+ {
+ "epoch": 0.5056211659211334,
+ "grad_norm": 25481.029296875,
+ "learning_rate": 6.405546186303852e-05,
+ "loss": 0.4268,
+ "step": 98000
+ },
+ {
+ "epoch": 0.5058791359037462,
+ "grad_norm": 21170.70703125,
+ "learning_rate": 6.402040101755961e-05,
+ "loss": 0.4253,
+ "step": 98050
+ },
+ {
+ "epoch": 0.5061371058863591,
+ "grad_norm": 20005.333984375,
+ "learning_rate": 6.398533268826212e-05,
+ "loss": 0.4267,
+ "step": 98100
+ },
+ {
+ "epoch": 0.5063950758689719,
+ "grad_norm": 20913.32421875,
+ "learning_rate": 6.395025689386485e-05,
+ "loss": 0.4245,
+ "step": 98150
+ },
+ {
+ "epoch": 0.5066530458515847,
+ "grad_norm": 24310.720703125,
+ "learning_rate": 6.391517365309059e-05,
+ "loss": 0.4246,
+ "step": 98200
+ },
+ {
+ "epoch": 0.5069110158341975,
+ "grad_norm": 21981.455078125,
+ "learning_rate": 6.388008298466607e-05,
+ "loss": 0.4286,
+ "step": 98250
+ },
+ {
+ "epoch": 0.5071689858168104,
+ "grad_norm": 23764.30078125,
+ "learning_rate": 6.384498490732202e-05,
+ "loss": 0.4282,
+ "step": 98300
+ },
+ {
+ "epoch": 0.5074269557994232,
+ "grad_norm": 20518.447265625,
+ "learning_rate": 6.380987943979314e-05,
+ "loss": 0.4333,
+ "step": 98350
+ },
+ {
+ "epoch": 0.5076849257820361,
+ "grad_norm": 23327.80859375,
+ "learning_rate": 6.377476660081803e-05,
+ "loss": 0.4255,
+ "step": 98400
+ },
+ {
+ "epoch": 0.5079428957646488,
+ "grad_norm": 19600.84375,
+ "learning_rate": 6.373964640913924e-05,
+ "loss": 0.4277,
+ "step": 98450
+ },
+ {
+ "epoch": 0.5082008657472616,
+ "grad_norm": 23252.146484375,
+ "learning_rate": 6.370451888350322e-05,
+ "loss": 0.4311,
+ "step": 98500
+ },
+ {
+ "epoch": 0.5084588357298745,
+ "grad_norm": 21930.736328125,
+ "learning_rate": 6.366938404266041e-05,
+ "loss": 0.4329,
+ "step": 98550
+ },
+ {
+ "epoch": 0.5087168057124873,
+ "grad_norm": 21249.69140625,
+ "learning_rate": 6.36342419053651e-05,
+ "loss": 0.4257,
+ "step": 98600
+ },
+ {
+ "epoch": 0.5089747756951001,
+ "grad_norm": 21809.4609375,
+ "learning_rate": 6.359909249037548e-05,
+ "loss": 0.431,
+ "step": 98650
+ },
+ {
+ "epoch": 0.5092327456777129,
+ "grad_norm": 23142.6796875,
+ "learning_rate": 6.356393581645359e-05,
+ "loss": 0.4329,
+ "step": 98700
+ },
+ {
+ "epoch": 0.5094907156603258,
+ "grad_norm": 21783.541015625,
+ "learning_rate": 6.352877190236542e-05,
+ "loss": 0.4362,
+ "step": 98750
+ },
+ {
+ "epoch": 0.5097486856429386,
+ "grad_norm": 22534.080078125,
+ "learning_rate": 6.349360076688079e-05,
+ "loss": 0.4302,
+ "step": 98800
+ },
+ {
+ "epoch": 0.5100066556255514,
+ "grad_norm": 22630.03515625,
+ "learning_rate": 6.345842242877336e-05,
+ "loss": 0.4314,
+ "step": 98850
+ },
+ {
+ "epoch": 0.5102646256081642,
+ "grad_norm": 23446.0390625,
+ "learning_rate": 6.342323690682064e-05,
+ "loss": 0.428,
+ "step": 98900
+ },
+ {
+ "epoch": 0.5105225955907771,
+ "grad_norm": 25644.2734375,
+ "learning_rate": 6.338804421980398e-05,
+ "loss": 0.4219,
+ "step": 98950
+ },
+ {
+ "epoch": 0.5107805655733899,
+ "grad_norm": 23159.580078125,
+ "learning_rate": 6.335284438650856e-05,
+ "loss": 0.434,
+ "step": 99000
+ },
+ {
+ "epoch": 0.5110385355560026,
+ "grad_norm": 23536.556640625,
+ "learning_rate": 6.331763742572337e-05,
+ "loss": 0.4293,
+ "step": 99050
+ },
+ {
+ "epoch": 0.5112965055386155,
+ "grad_norm": 23240.662109375,
+ "learning_rate": 6.328242335624121e-05,
+ "loss": 0.434,
+ "step": 99100
+ },
+ {
+ "epoch": 0.5115544755212283,
+ "grad_norm": 22368.94921875,
+ "learning_rate": 6.324720219685866e-05,
+ "loss": 0.4295,
+ "step": 99150
+ },
+ {
+ "epoch": 0.5118124455038412,
+ "grad_norm": 23257.068359375,
+ "learning_rate": 6.321197396637608e-05,
+ "loss": 0.4198,
+ "step": 99200
+ },
+ {
+ "epoch": 0.512070415486454,
+ "grad_norm": 21806.6953125,
+ "learning_rate": 6.317673868359765e-05,
+ "loss": 0.4241,
+ "step": 99250
+ },
+ {
+ "epoch": 0.5123283854690668,
+ "grad_norm": 24117.416015625,
+ "learning_rate": 6.314149636733125e-05,
+ "loss": 0.4261,
+ "step": 99300
+ },
+ {
+ "epoch": 0.5125863554516796,
+ "grad_norm": 25262.626953125,
+ "learning_rate": 6.310624703638858e-05,
+ "loss": 0.4234,
+ "step": 99350
+ },
+ {
+ "epoch": 0.5128443254342925,
+ "grad_norm": 22739.923828125,
+ "learning_rate": 6.3070990709585e-05,
+ "loss": 0.4299,
+ "step": 99400
+ },
+ {
+ "epoch": 0.5131022954169053,
+ "grad_norm": 20651.646484375,
+ "learning_rate": 6.303572740573971e-05,
+ "loss": 0.4307,
+ "step": 99450
+ },
+ {
+ "epoch": 0.5133602653995181,
+ "grad_norm": 22125.037109375,
+ "learning_rate": 6.300045714367555e-05,
+ "loss": 0.4216,
+ "step": 99500
+ },
+ {
+ "epoch": 0.5136182353821309,
+ "grad_norm": 22210.080078125,
+ "learning_rate": 6.29651799422191e-05,
+ "loss": 0.429,
+ "step": 99550
+ },
+ {
+ "epoch": 0.5138762053647438,
+ "grad_norm": 23850.673828125,
+ "learning_rate": 6.292989582020063e-05,
+ "loss": 0.4337,
+ "step": 99600
+ },
+ {
+ "epoch": 0.5141341753473566,
+ "grad_norm": 21346.251953125,
+ "learning_rate": 6.289460479645417e-05,
+ "loss": 0.4352,
+ "step": 99650
+ },
+ {
+ "epoch": 0.5143921453299694,
+ "grad_norm": 22687.080078125,
+ "learning_rate": 6.285930688981735e-05,
+ "loss": 0.433,
+ "step": 99700
+ },
+ {
+ "epoch": 0.5146501153125822,
+ "grad_norm": 20447.666015625,
+ "learning_rate": 6.282400211913154e-05,
+ "loss": 0.4288,
+ "step": 99750
+ },
+ {
+ "epoch": 0.514908085295195,
+ "grad_norm": 21768.51953125,
+ "learning_rate": 6.278869050324168e-05,
+ "loss": 0.4363,
+ "step": 99800
+ },
+ {
+ "epoch": 0.5151660552778079,
+ "grad_norm": 21896.47265625,
+ "learning_rate": 6.27533720609965e-05,
+ "loss": 0.4307,
+ "step": 99850
+ },
+ {
+ "epoch": 0.5154240252604207,
+ "grad_norm": 22967.384765625,
+ "learning_rate": 6.271804681124827e-05,
+ "loss": 0.4295,
+ "step": 99900
+ },
+ {
+ "epoch": 0.5156819952430335,
+ "grad_norm": 20233.869140625,
+ "learning_rate": 6.268271477285292e-05,
+ "loss": 0.4329,
+ "step": 99950
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "grad_norm": 20550.060546875,
+ "learning_rate": 6.264737596466998e-05,
+ "loss": 0.4267,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "eval_loss": 0.4161209166049957,
+ "eval_runtime": 2887.0736,
+ "eval_samples_per_second": 1074.14,
+ "eval_steps_per_second": 2.098,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5161979352082592,
+ "grad_norm": 22327.767578125,
+ "learning_rate": 6.261203040556267e-05,
+ "loss": 0.4272,
+ "step": 100050
+ },
+ {
+ "epoch": 0.516455905190872,
+ "grad_norm": 22512.1640625,
+ "learning_rate": 6.257667811439776e-05,
+ "loss": 0.4267,
+ "step": 100100
+ },
+ {
+ "epoch": 0.5167138751734848,
+ "grad_norm": 22710.8828125,
+ "learning_rate": 6.254131911004561e-05,
+ "loss": 0.42,
+ "step": 100150
+ },
+ {
+ "epoch": 0.5169718451560976,
+ "grad_norm": 21731.365234375,
+ "learning_rate": 6.250595341138014e-05,
+ "loss": 0.4259,
+ "step": 100200
+ },
+ {
+ "epoch": 0.5172298151387105,
+ "grad_norm": 21478.970703125,
+ "learning_rate": 6.247058103727892e-05,
+ "loss": 0.4217,
+ "step": 100250
+ },
+ {
+ "epoch": 0.5174877851213233,
+ "grad_norm": 22431.939453125,
+ "learning_rate": 6.243520200662303e-05,
+ "loss": 0.4272,
+ "step": 100300
+ },
+ {
+ "epoch": 0.5177457551039361,
+ "grad_norm": 22137.5078125,
+ "learning_rate": 6.239981633829709e-05,
+ "loss": 0.4301,
+ "step": 100350
+ },
+ {
+ "epoch": 0.5180037250865489,
+ "grad_norm": 22802.220703125,
+ "learning_rate": 6.23644240511893e-05,
+ "loss": 0.4346,
+ "step": 100400
+ },
+ {
+ "epoch": 0.5182616950691618,
+ "grad_norm": 20567.640625,
+ "learning_rate": 6.232902516419137e-05,
+ "loss": 0.4271,
+ "step": 100450
+ },
+ {
+ "epoch": 0.5185196650517746,
+ "grad_norm": 20855.70703125,
+ "learning_rate": 6.229361969619855e-05,
+ "loss": 0.4237,
+ "step": 100500
+ },
+ {
+ "epoch": 0.5187776350343875,
+ "grad_norm": 22052.44921875,
+ "learning_rate": 6.225820766610958e-05,
+ "loss": 0.4324,
+ "step": 100550
+ },
+ {
+ "epoch": 0.5190356050170002,
+ "grad_norm": 21984.818359375,
+ "learning_rate": 6.222278909282674e-05,
+ "loss": 0.4315,
+ "step": 100600
+ },
+ {
+ "epoch": 0.519293574999613,
+ "grad_norm": 22044.8359375,
+ "learning_rate": 6.218736399525575e-05,
+ "loss": 0.4324,
+ "step": 100650
+ },
+ {
+ "epoch": 0.5195515449822259,
+ "grad_norm": 22661.78515625,
+ "learning_rate": 6.215193239230586e-05,
+ "loss": 0.4273,
+ "step": 100700
+ },
+ {
+ "epoch": 0.5198095149648387,
+ "grad_norm": 22091.01171875,
+ "learning_rate": 6.211649430288976e-05,
+ "loss": 0.4252,
+ "step": 100750
+ },
+ {
+ "epoch": 0.5200674849474515,
+ "grad_norm": 22164.376953125,
+ "learning_rate": 6.208104974592364e-05,
+ "loss": 0.4272,
+ "step": 100800
+ },
+ {
+ "epoch": 0.5203254549300643,
+ "grad_norm": 23387.287109375,
+ "learning_rate": 6.20455987403271e-05,
+ "loss": 0.4281,
+ "step": 100850
+ },
+ {
+ "epoch": 0.5205834249126772,
+ "grad_norm": 22505.326171875,
+ "learning_rate": 6.201014130502317e-05,
+ "loss": 0.4285,
+ "step": 100900
+ },
+ {
+ "epoch": 0.52084139489529,
+ "grad_norm": 21150.341796875,
+ "learning_rate": 6.19746774589384e-05,
+ "loss": 0.4274,
+ "step": 100950
+ },
+ {
+ "epoch": 0.5210993648779028,
+ "grad_norm": 23076.650390625,
+ "learning_rate": 6.193920722100268e-05,
+ "loss": 0.4289,
+ "step": 101000
+ },
+ {
+ "epoch": 0.5213573348605156,
+ "grad_norm": 20890.41796875,
+ "learning_rate": 6.190373061014932e-05,
+ "loss": 0.4305,
+ "step": 101050
+ },
+ {
+ "epoch": 0.5216153048431285,
+ "grad_norm": 22231.6328125,
+ "learning_rate": 6.186824764531507e-05,
+ "loss": 0.4304,
+ "step": 101100
+ },
+ {
+ "epoch": 0.5218732748257413,
+ "grad_norm": 22094.197265625,
+ "learning_rate": 6.183275834544005e-05,
+ "loss": 0.4279,
+ "step": 101150
+ },
+ {
+ "epoch": 0.522131244808354,
+ "grad_norm": 23188.353515625,
+ "learning_rate": 6.179726272946774e-05,
+ "loss": 0.4272,
+ "step": 101200
+ },
+ {
+ "epoch": 0.5223892147909669,
+ "grad_norm": 22908.5,
+ "learning_rate": 6.176176081634504e-05,
+ "loss": 0.4229,
+ "step": 101250
+ },
+ {
+ "epoch": 0.5226471847735797,
+ "grad_norm": 21536.37109375,
+ "learning_rate": 6.172625262502215e-05,
+ "loss": 0.4267,
+ "step": 101300
+ },
+ {
+ "epoch": 0.5229051547561926,
+ "grad_norm": 22923.38671875,
+ "learning_rate": 6.169073817445268e-05,
+ "loss": 0.4256,
+ "step": 101350
+ },
+ {
+ "epoch": 0.5231631247388054,
+ "grad_norm": 22802.669921875,
+ "learning_rate": 6.165521748359356e-05,
+ "loss": 0.4241,
+ "step": 101400
+ },
+ {
+ "epoch": 0.5234210947214182,
+ "grad_norm": 22852.59765625,
+ "learning_rate": 6.161969057140504e-05,
+ "loss": 0.4275,
+ "step": 101450
+ },
+ {
+ "epoch": 0.523679064704031,
+ "grad_norm": 27410.056640625,
+ "learning_rate": 6.158415745685068e-05,
+ "loss": 0.4316,
+ "step": 101500
+ },
+ {
+ "epoch": 0.5239370346866439,
+ "grad_norm": 21783.482421875,
+ "learning_rate": 6.15486181588974e-05,
+ "loss": 0.4235,
+ "step": 101550
+ },
+ {
+ "epoch": 0.5241950046692567,
+ "grad_norm": 21013.259765625,
+ "learning_rate": 6.151307269651536e-05,
+ "loss": 0.426,
+ "step": 101600
+ },
+ {
+ "epoch": 0.5244529746518695,
+ "grad_norm": 23852.673828125,
+ "learning_rate": 6.147752108867807e-05,
+ "loss": 0.4226,
+ "step": 101650
+ },
+ {
+ "epoch": 0.5247109446344823,
+ "grad_norm": 24846.427734375,
+ "learning_rate": 6.144196335436225e-05,
+ "loss": 0.4277,
+ "step": 101700
+ },
+ {
+ "epoch": 0.5249689146170952,
+ "grad_norm": 21197.177734375,
+ "learning_rate": 6.140639951254796e-05,
+ "loss": 0.4247,
+ "step": 101750
+ },
+ {
+ "epoch": 0.525226884599708,
+ "grad_norm": 24620.37890625,
+ "learning_rate": 6.137082958221848e-05,
+ "loss": 0.429,
+ "step": 101800
+ },
+ {
+ "epoch": 0.5254848545823207,
+ "grad_norm": 22811.875,
+ "learning_rate": 6.133525358236036e-05,
+ "loss": 0.4274,
+ "step": 101850
+ },
+ {
+ "epoch": 0.5257428245649336,
+ "grad_norm": 20224.125,
+ "learning_rate": 6.129967153196336e-05,
+ "loss": 0.4338,
+ "step": 101900
+ },
+ {
+ "epoch": 0.5260007945475464,
+ "grad_norm": 21489.734375,
+ "learning_rate": 6.126408345002052e-05,
+ "loss": 0.4333,
+ "step": 101950
+ },
+ {
+ "epoch": 0.5262587645301593,
+ "grad_norm": 21771.20703125,
+ "learning_rate": 6.122848935552804e-05,
+ "loss": 0.4258,
+ "step": 102000
+ },
+ {
+ "epoch": 0.5265167345127721,
+ "grad_norm": 23362.43359375,
+ "learning_rate": 6.119288926748537e-05,
+ "loss": 0.4234,
+ "step": 102050
+ },
+ {
+ "epoch": 0.5267747044953849,
+ "grad_norm": 20869.46484375,
+ "learning_rate": 6.115728320489516e-05,
+ "loss": 0.4233,
+ "step": 102100
+ },
+ {
+ "epoch": 0.5270326744779977,
+ "grad_norm": 21146.568359375,
+ "learning_rate": 6.11216711867632e-05,
+ "loss": 0.4243,
+ "step": 102150
+ },
+ {
+ "epoch": 0.5272906444606106,
+ "grad_norm": 24031.97265625,
+ "learning_rate": 6.108605323209853e-05,
+ "loss": 0.4334,
+ "step": 102200
+ },
+ {
+ "epoch": 0.5275486144432234,
+ "grad_norm": 23461.306640625,
+ "learning_rate": 6.10504293599133e-05,
+ "loss": 0.4289,
+ "step": 102250
+ },
+ {
+ "epoch": 0.5278065844258362,
+ "grad_norm": 21013.169921875,
+ "learning_rate": 6.101479958922287e-05,
+ "loss": 0.4334,
+ "step": 102300
+ },
+ {
+ "epoch": 0.528064554408449,
+ "grad_norm": 23328.306640625,
+ "learning_rate": 6.0979163939045716e-05,
+ "loss": 0.4285,
+ "step": 102350
+ },
+ {
+ "epoch": 0.5283225243910619,
+ "grad_norm": 21542.20703125,
+ "learning_rate": 6.094352242840343e-05,
+ "loss": 0.4321,
+ "step": 102400
+ },
+ {
+ "epoch": 0.5285804943736747,
+ "grad_norm": 20556.357421875,
+ "learning_rate": 6.09078750763208e-05,
+ "loss": 0.4255,
+ "step": 102450
+ },
+ {
+ "epoch": 0.5288384643562875,
+ "grad_norm": 24925.21875,
+ "learning_rate": 6.0872221901825666e-05,
+ "loss": 0.4225,
+ "step": 102500
+ },
+ {
+ "epoch": 0.5290964343389003,
+ "grad_norm": 22750.419921875,
+ "learning_rate": 6.0836562923949016e-05,
+ "loss": 0.4287,
+ "step": 102550
+ },
+ {
+ "epoch": 0.5293544043215132,
+ "grad_norm": 21514.8984375,
+ "learning_rate": 6.080089816172489e-05,
+ "loss": 0.4254,
+ "step": 102600
+ },
+ {
+ "epoch": 0.529612374304126,
+ "grad_norm": 23347.03125,
+ "learning_rate": 6.07652276341905e-05,
+ "loss": 0.4346,
+ "step": 102650
+ },
+ {
+ "epoch": 0.5298703442867388,
+ "grad_norm": 23180.916015625,
+ "learning_rate": 6.072955136038604e-05,
+ "loss": 0.4244,
+ "step": 102700
+ },
+ {
+ "epoch": 0.5301283142693516,
+ "grad_norm": 20701.431640625,
+ "learning_rate": 6.069386935935484e-05,
+ "loss": 0.43,
+ "step": 102750
+ },
+ {
+ "epoch": 0.5303862842519644,
+ "grad_norm": 23350.99609375,
+ "learning_rate": 6.0658181650143245e-05,
+ "loss": 0.4217,
+ "step": 102800
+ },
+ {
+ "epoch": 0.5306442542345773,
+ "grad_norm": 21068.111328125,
+ "learning_rate": 6.062248825180066e-05,
+ "loss": 0.4278,
+ "step": 102850
+ },
+ {
+ "epoch": 0.5309022242171901,
+ "grad_norm": 23415.25,
+ "learning_rate": 6.0586789183379554e-05,
+ "loss": 0.4331,
+ "step": 102900
+ },
+ {
+ "epoch": 0.5311601941998029,
+ "grad_norm": 22186.048828125,
+ "learning_rate": 6.055108446393538e-05,
+ "loss": 0.4327,
+ "step": 102950
+ },
+ {
+ "epoch": 0.5314181641824157,
+ "grad_norm": 20644.166015625,
+ "learning_rate": 6.051537411252662e-05,
+ "loss": 0.4264,
+ "step": 103000
+ },
+ {
+ "epoch": 0.5316761341650286,
+ "grad_norm": 21755.712890625,
+ "learning_rate": 6.047965814821478e-05,
+ "loss": 0.4253,
+ "step": 103050
+ },
+ {
+ "epoch": 0.5319341041476414,
+ "grad_norm": 22319.177734375,
+ "learning_rate": 6.044393659006435e-05,
+ "loss": 0.4238,
+ "step": 103100
+ },
+ {
+ "epoch": 0.5321920741302542,
+ "grad_norm": 22544.064453125,
+ "learning_rate": 6.040820945714281e-05,
+ "loss": 0.4306,
+ "step": 103150
+ },
+ {
+ "epoch": 0.532450044112867,
+ "grad_norm": 21484.53125,
+ "learning_rate": 6.037247676852059e-05,
+ "loss": 0.4254,
+ "step": 103200
+ },
+ {
+ "epoch": 0.5327080140954799,
+ "grad_norm": 23923.201171875,
+ "learning_rate": 6.033673854327114e-05,
+ "loss": 0.4258,
+ "step": 103250
+ },
+ {
+ "epoch": 0.5329659840780927,
+ "grad_norm": 20412.08984375,
+ "learning_rate": 6.03009948004708e-05,
+ "loss": 0.4286,
+ "step": 103300
+ },
+ {
+ "epoch": 0.5332239540607056,
+ "grad_norm": 19932.908203125,
+ "learning_rate": 6.026524555919891e-05,
+ "loss": 0.4367,
+ "step": 103350
+ },
+ {
+ "epoch": 0.5334819240433183,
+ "grad_norm": 21761.033203125,
+ "learning_rate": 6.022949083853772e-05,
+ "loss": 0.4272,
+ "step": 103400
+ },
+ {
+ "epoch": 0.5337398940259311,
+ "grad_norm": 23392.29296875,
+ "learning_rate": 6.019373065757239e-05,
+ "loss": 0.4274,
+ "step": 103450
+ },
+ {
+ "epoch": 0.533997864008544,
+ "grad_norm": 26151.69921875,
+ "learning_rate": 6.015796503539103e-05,
+ "loss": 0.4189,
+ "step": 103500
+ },
+ {
+ "epoch": 0.5342558339911568,
+ "grad_norm": 22503.529296875,
+ "learning_rate": 6.012219399108463e-05,
+ "loss": 0.428,
+ "step": 103550
+ },
+ {
+ "epoch": 0.5345138039737696,
+ "grad_norm": 25906.685546875,
+ "learning_rate": 6.008641754374709e-05,
+ "loss": 0.4287,
+ "step": 103600
+ },
+ {
+ "epoch": 0.5347717739563824,
+ "grad_norm": 23784.685546875,
+ "learning_rate": 6.005063571247517e-05,
+ "loss": 0.4276,
+ "step": 103650
+ },
+ {
+ "epoch": 0.5350297439389953,
+ "grad_norm": 21574.30078125,
+ "learning_rate": 6.0014848516368515e-05,
+ "loss": 0.4344,
+ "step": 103700
+ },
+ {
+ "epoch": 0.5352877139216081,
+ "grad_norm": 22296.921875,
+ "learning_rate": 5.9979055974529675e-05,
+ "loss": 0.4322,
+ "step": 103750
+ },
+ {
+ "epoch": 0.5355456839042209,
+ "grad_norm": 21478.611328125,
+ "learning_rate": 5.994325810606397e-05,
+ "loss": 0.429,
+ "step": 103800
+ },
+ {
+ "epoch": 0.5358036538868337,
+ "grad_norm": 22572.37109375,
+ "learning_rate": 5.9907454930079645e-05,
+ "loss": 0.4281,
+ "step": 103850
+ },
+ {
+ "epoch": 0.5360616238694466,
+ "grad_norm": 23416.80859375,
+ "learning_rate": 5.98716464656877e-05,
+ "loss": 0.4266,
+ "step": 103900
+ },
+ {
+ "epoch": 0.5363195938520594,
+ "grad_norm": 23470.626953125,
+ "learning_rate": 5.983583273200204e-05,
+ "loss": 0.426,
+ "step": 103950
+ },
+ {
+ "epoch": 0.5365775638346721,
+ "grad_norm": 24464.38671875,
+ "learning_rate": 5.980001374813933e-05,
+ "loss": 0.4218,
+ "step": 104000
+ },
+ {
+ "epoch": 0.536835533817285,
+ "grad_norm": 23835.29296875,
+ "learning_rate": 5.976418953321904e-05,
+ "loss": 0.4261,
+ "step": 104050
+ },
+ {
+ "epoch": 0.5370935037998978,
+ "grad_norm": 23344.654296875,
+ "learning_rate": 5.972836010636346e-05,
+ "loss": 0.4292,
+ "step": 104100
+ },
+ {
+ "epoch": 0.5373514737825107,
+ "grad_norm": 23925.935546875,
+ "learning_rate": 5.9692525486697616e-05,
+ "loss": 0.4323,
+ "step": 104150
+ },
+ {
+ "epoch": 0.5376094437651235,
+ "grad_norm": 23155.76953125,
+ "learning_rate": 5.965668569334937e-05,
+ "loss": 0.428,
+ "step": 104200
+ },
+ {
+ "epoch": 0.5378674137477363,
+ "grad_norm": 22334.19921875,
+ "learning_rate": 5.962084074544928e-05,
+ "loss": 0.4129,
+ "step": 104250
+ },
+ {
+ "epoch": 0.5381253837303491,
+ "grad_norm": 20239.66796875,
+ "learning_rate": 5.95849906621307e-05,
+ "loss": 0.4335,
+ "step": 104300
+ },
+ {
+ "epoch": 0.538383353712962,
+ "grad_norm": 22626.19140625,
+ "learning_rate": 5.9549135462529704e-05,
+ "loss": 0.4274,
+ "step": 104350
+ },
+ {
+ "epoch": 0.5386413236955748,
+ "grad_norm": 21798.65625,
+ "learning_rate": 5.951327516578512e-05,
+ "loss": 0.4258,
+ "step": 104400
+ },
+ {
+ "epoch": 0.5388992936781876,
+ "grad_norm": 21796.7421875,
+ "learning_rate": 5.947740979103845e-05,
+ "loss": 0.4263,
+ "step": 104450
+ },
+ {
+ "epoch": 0.5391572636608004,
+ "grad_norm": 22380.21484375,
+ "learning_rate": 5.944153935743396e-05,
+ "loss": 0.4218,
+ "step": 104500
+ },
+ {
+ "epoch": 0.5394152336434133,
+ "grad_norm": 22526.4296875,
+ "learning_rate": 5.940566388411859e-05,
+ "loss": 0.4233,
+ "step": 104550
+ },
+ {
+ "epoch": 0.5396732036260261,
+ "grad_norm": 22876.5703125,
+ "learning_rate": 5.936978339024195e-05,
+ "loss": 0.4296,
+ "step": 104600
+ },
+ {
+ "epoch": 0.5399311736086388,
+ "grad_norm": 22592.654296875,
+ "learning_rate": 5.9333897894956394e-05,
+ "loss": 0.4287,
+ "step": 104650
+ },
+ {
+ "epoch": 0.5401891435912517,
+ "grad_norm": 21235.43359375,
+ "learning_rate": 5.929800741741688e-05,
+ "loss": 0.4269,
+ "step": 104700
+ },
+ {
+ "epoch": 0.5404471135738645,
+ "grad_norm": 22049.05859375,
+ "learning_rate": 5.926211197678104e-05,
+ "loss": 0.4266,
+ "step": 104750
+ },
+ {
+ "epoch": 0.5407050835564774,
+ "grad_norm": 23252.845703125,
+ "learning_rate": 5.922621159220918e-05,
+ "loss": 0.4223,
+ "step": 104800
+ },
+ {
+ "epoch": 0.5409630535390902,
+ "grad_norm": 20577.1796875,
+ "learning_rate": 5.919030628286424e-05,
+ "loss": 0.4302,
+ "step": 104850
+ },
+ {
+ "epoch": 0.541221023521703,
+ "grad_norm": 24854.8671875,
+ "learning_rate": 5.915439606791174e-05,
+ "loss": 0.4212,
+ "step": 104900
+ },
+ {
+ "epoch": 0.5414789935043158,
+ "grad_norm": 22561.552734375,
+ "learning_rate": 5.9118480966519906e-05,
+ "loss": 0.4196,
+ "step": 104950
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "grad_norm": 23885.4765625,
+ "learning_rate": 5.9082560997859496e-05,
+ "loss": 0.421,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "eval_loss": 0.4132173955440521,
+ "eval_runtime": 2876.3365,
+ "eval_samples_per_second": 1078.149,
+ "eval_steps_per_second": 2.106,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5419949334695415,
+ "grad_norm": 20974.994140625,
+ "learning_rate": 5.90466361811039e-05,
+ "loss": 0.4228,
+ "step": 105050
+ },
+ {
+ "epoch": 0.5422529034521543,
+ "grad_norm": 24338.412109375,
+ "learning_rate": 5.9010706535429086e-05,
+ "loss": 0.4215,
+ "step": 105100
+ },
+ {
+ "epoch": 0.5425108734347671,
+ "grad_norm": 20734.796875,
+ "learning_rate": 5.8974772080013605e-05,
+ "loss": 0.4319,
+ "step": 105150
+ },
+ {
+ "epoch": 0.54276884341738,
+ "grad_norm": 21026.123046875,
+ "learning_rate": 5.8938832834038574e-05,
+ "loss": 0.4318,
+ "step": 105200
+ },
+ {
+ "epoch": 0.5430268133999928,
+ "grad_norm": 20023.287109375,
+ "learning_rate": 5.890288881668766e-05,
+ "loss": 0.4306,
+ "step": 105250
+ },
+ {
+ "epoch": 0.5432847833826056,
+ "grad_norm": 23171.42578125,
+ "learning_rate": 5.88669400471471e-05,
+ "loss": 0.4237,
+ "step": 105300
+ },
+ {
+ "epoch": 0.5435427533652184,
+ "grad_norm": 21692.109375,
+ "learning_rate": 5.8830986544605635e-05,
+ "loss": 0.4261,
+ "step": 105350
+ },
+ {
+ "epoch": 0.5438007233478312,
+ "grad_norm": 22358.216796875,
+ "learning_rate": 5.8795028328254566e-05,
+ "loss": 0.4204,
+ "step": 105400
+ },
+ {
+ "epoch": 0.5440586933304441,
+ "grad_norm": 22529.650390625,
+ "learning_rate": 5.875906541728766e-05,
+ "loss": 0.422,
+ "step": 105450
+ },
+ {
+ "epoch": 0.544316663313057,
+ "grad_norm": 18307.05859375,
+ "learning_rate": 5.8723097830901264e-05,
+ "loss": 0.4236,
+ "step": 105500
+ },
+ {
+ "epoch": 0.5445746332956697,
+ "grad_norm": 22356.583984375,
+ "learning_rate": 5.8687125588294154e-05,
+ "loss": 0.4213,
+ "step": 105550
+ },
+ {
+ "epoch": 0.5448326032782825,
+ "grad_norm": 21446.732421875,
+ "learning_rate": 5.8651148708667625e-05,
+ "loss": 0.4216,
+ "step": 105600
+ },
+ {
+ "epoch": 0.5450905732608954,
+ "grad_norm": 24014.49609375,
+ "learning_rate": 5.8615167211225416e-05,
+ "loss": 0.4283,
+ "step": 105650
+ },
+ {
+ "epoch": 0.5453485432435082,
+ "grad_norm": 22394.306640625,
+ "learning_rate": 5.8579181115173785e-05,
+ "loss": 0.4242,
+ "step": 105700
+ },
+ {
+ "epoch": 0.545606513226121,
+ "grad_norm": 25348.26171875,
+ "learning_rate": 5.8543190439721405e-05,
+ "loss": 0.4234,
+ "step": 105750
+ },
+ {
+ "epoch": 0.5458644832087338,
+ "grad_norm": 22638.720703125,
+ "learning_rate": 5.850719520407939e-05,
+ "loss": 0.4269,
+ "step": 105800
+ },
+ {
+ "epoch": 0.5461224531913467,
+ "grad_norm": 22702.841796875,
+ "learning_rate": 5.847119542746131e-05,
+ "loss": 0.4201,
+ "step": 105850
+ },
+ {
+ "epoch": 0.5463804231739595,
+ "grad_norm": 22299.849609375,
+ "learning_rate": 5.843519112908315e-05,
+ "loss": 0.4243,
+ "step": 105900
+ },
+ {
+ "epoch": 0.5466383931565723,
+ "grad_norm": 21965.283203125,
+ "learning_rate": 5.8399182328163304e-05,
+ "loss": 0.4209,
+ "step": 105950
+ },
+ {
+ "epoch": 0.5468963631391851,
+ "grad_norm": 22101.755859375,
+ "learning_rate": 5.836316904392256e-05,
+ "loss": 0.4254,
+ "step": 106000
+ },
+ {
+ "epoch": 0.547154333121798,
+ "grad_norm": 22735.970703125,
+ "learning_rate": 5.8327151295584126e-05,
+ "loss": 0.4251,
+ "step": 106050
+ },
+ {
+ "epoch": 0.5474123031044108,
+ "grad_norm": 24287.58203125,
+ "learning_rate": 5.829112910237359e-05,
+ "loss": 0.427,
+ "step": 106100
+ },
+ {
+ "epoch": 0.5476702730870235,
+ "grad_norm": 22509.02734375,
+ "learning_rate": 5.825510248351889e-05,
+ "loss": 0.4209,
+ "step": 106150
+ },
+ {
+ "epoch": 0.5479282430696364,
+ "grad_norm": 22325.32421875,
+ "learning_rate": 5.821907145825032e-05,
+ "loss": 0.4276,
+ "step": 106200
+ },
+ {
+ "epoch": 0.5481862130522492,
+ "grad_norm": 21362.255859375,
+ "learning_rate": 5.8183036045800556e-05,
+ "loss": 0.4273,
+ "step": 106250
+ },
+ {
+ "epoch": 0.5484441830348621,
+ "grad_norm": 22934.61328125,
+ "learning_rate": 5.814699626540461e-05,
+ "loss": 0.4318,
+ "step": 106300
+ },
+ {
+ "epoch": 0.5487021530174749,
+ "grad_norm": 23663.65625,
+ "learning_rate": 5.8110952136299814e-05,
+ "loss": 0.4246,
+ "step": 106350
+ },
+ {
+ "epoch": 0.5489601230000877,
+ "grad_norm": 20743.84765625,
+ "learning_rate": 5.807490367772584e-05,
+ "loss": 0.4289,
+ "step": 106400
+ },
+ {
+ "epoch": 0.5492180929827005,
+ "grad_norm": 20859.244140625,
+ "learning_rate": 5.8038850908924636e-05,
+ "loss": 0.4255,
+ "step": 106450
+ },
+ {
+ "epoch": 0.5494760629653134,
+ "grad_norm": 21824.990234375,
+ "learning_rate": 5.800279384914047e-05,
+ "loss": 0.4311,
+ "step": 106500
+ },
+ {
+ "epoch": 0.5497340329479262,
+ "grad_norm": 19514.681640625,
+ "learning_rate": 5.7966732517619926e-05,
+ "loss": 0.4311,
+ "step": 106550
+ },
+ {
+ "epoch": 0.549992002930539,
+ "grad_norm": 24263.765625,
+ "learning_rate": 5.7930666933611835e-05,
+ "loss": 0.4257,
+ "step": 106600
+ },
+ {
+ "epoch": 0.5502499729131518,
+ "grad_norm": 23152.279296875,
+ "learning_rate": 5.789459711636729e-05,
+ "loss": 0.4226,
+ "step": 106650
+ },
+ {
+ "epoch": 0.5505079428957647,
+ "grad_norm": 21756.8671875,
+ "learning_rate": 5.785852308513967e-05,
+ "loss": 0.4266,
+ "step": 106700
+ },
+ {
+ "epoch": 0.5507659128783775,
+ "grad_norm": 20913.3125,
+ "learning_rate": 5.78224448591846e-05,
+ "loss": 0.4228,
+ "step": 106750
+ },
+ {
+ "epoch": 0.5510238828609902,
+ "grad_norm": 24674.92578125,
+ "learning_rate": 5.778636245775996e-05,
+ "loss": 0.4246,
+ "step": 106800
+ },
+ {
+ "epoch": 0.5512818528436031,
+ "grad_norm": 24229.4296875,
+ "learning_rate": 5.775027590012579e-05,
+ "loss": 0.4244,
+ "step": 106850
+ },
+ {
+ "epoch": 0.5515398228262159,
+ "grad_norm": 21722.048828125,
+ "learning_rate": 5.771418520554443e-05,
+ "loss": 0.4264,
+ "step": 106900
+ },
+ {
+ "epoch": 0.5517977928088288,
+ "grad_norm": 22060.224609375,
+ "learning_rate": 5.7678090393280384e-05,
+ "loss": 0.4268,
+ "step": 106950
+ },
+ {
+ "epoch": 0.5520557627914416,
+ "grad_norm": 25690.306640625,
+ "learning_rate": 5.7641991482600366e-05,
+ "loss": 0.4298,
+ "step": 107000
+ },
+ {
+ "epoch": 0.5523137327740544,
+ "grad_norm": 24629.115234375,
+ "learning_rate": 5.7605888492773266e-05,
+ "loss": 0.4223,
+ "step": 107050
+ },
+ {
+ "epoch": 0.5525717027566672,
+ "grad_norm": 23552.78515625,
+ "learning_rate": 5.756978144307018e-05,
+ "loss": 0.4246,
+ "step": 107100
+ },
+ {
+ "epoch": 0.5528296727392801,
+ "grad_norm": 21611.703125,
+ "learning_rate": 5.753367035276431e-05,
+ "loss": 0.4173,
+ "step": 107150
+ },
+ {
+ "epoch": 0.5530876427218929,
+ "grad_norm": 24158.64453125,
+ "learning_rate": 5.749755524113111e-05,
+ "loss": 0.4211,
+ "step": 107200
+ },
+ {
+ "epoch": 0.5533456127045057,
+ "grad_norm": 23446.94140625,
+ "learning_rate": 5.746143612744811e-05,
+ "loss": 0.4262,
+ "step": 107250
+ },
+ {
+ "epoch": 0.5536035826871185,
+ "grad_norm": 21608.703125,
+ "learning_rate": 5.742531303099498e-05,
+ "loss": 0.424,
+ "step": 107300
+ },
+ {
+ "epoch": 0.5538615526697314,
+ "grad_norm": 25070.78125,
+ "learning_rate": 5.738918597105353e-05,
+ "loss": 0.4219,
+ "step": 107350
+ },
+ {
+ "epoch": 0.5541195226523442,
+ "grad_norm": 21161.5234375,
+ "learning_rate": 5.735305496690769e-05,
+ "loss": 0.4293,
+ "step": 107400
+ },
+ {
+ "epoch": 0.554377492634957,
+ "grad_norm": 23108.521484375,
+ "learning_rate": 5.7316920037843516e-05,
+ "loss": 0.427,
+ "step": 107450
+ },
+ {
+ "epoch": 0.5546354626175698,
+ "grad_norm": 22233.87890625,
+ "learning_rate": 5.728078120314909e-05,
+ "loss": 0.4204,
+ "step": 107500
+ },
+ {
+ "epoch": 0.5548934326001826,
+ "grad_norm": 24522.310546875,
+ "learning_rate": 5.724463848211464e-05,
+ "loss": 0.4257,
+ "step": 107550
+ },
+ {
+ "epoch": 0.5551514025827955,
+ "grad_norm": 22916.892578125,
+ "learning_rate": 5.720849189403244e-05,
+ "loss": 0.4251,
+ "step": 107600
+ },
+ {
+ "epoch": 0.5554093725654083,
+ "grad_norm": 20069.236328125,
+ "learning_rate": 5.7172341458196876e-05,
+ "loss": 0.424,
+ "step": 107650
+ },
+ {
+ "epoch": 0.5556673425480211,
+ "grad_norm": 25173.86328125,
+ "learning_rate": 5.713618719390432e-05,
+ "loss": 0.4294,
+ "step": 107700
+ },
+ {
+ "epoch": 0.5559253125306339,
+ "grad_norm": 21957.373046875,
+ "learning_rate": 5.710002912045323e-05,
+ "loss": 0.4239,
+ "step": 107750
+ },
+ {
+ "epoch": 0.5561832825132468,
+ "grad_norm": 20540.82421875,
+ "learning_rate": 5.706386725714407e-05,
+ "loss": 0.4259,
+ "step": 107800
+ },
+ {
+ "epoch": 0.5564412524958596,
+ "grad_norm": 22470.4921875,
+ "learning_rate": 5.702770162327936e-05,
+ "loss": 0.4273,
+ "step": 107850
+ },
+ {
+ "epoch": 0.5566992224784724,
+ "grad_norm": 21721.197265625,
+ "learning_rate": 5.69915322381636e-05,
+ "loss": 0.4233,
+ "step": 107900
+ },
+ {
+ "epoch": 0.5569571924610852,
+ "grad_norm": 21666.955078125,
+ "learning_rate": 5.6955359121103324e-05,
+ "loss": 0.4283,
+ "step": 107950
+ },
+ {
+ "epoch": 0.5572151624436981,
+ "grad_norm": 20970.9296875,
+ "learning_rate": 5.6919182291407014e-05,
+ "loss": 0.4275,
+ "step": 108000
+ },
+ {
+ "epoch": 0.5574731324263109,
+ "grad_norm": 22353.306640625,
+ "learning_rate": 5.688300176838518e-05,
+ "loss": 0.4244,
+ "step": 108050
+ },
+ {
+ "epoch": 0.5577311024089237,
+ "grad_norm": 22607.357421875,
+ "learning_rate": 5.68468175713503e-05,
+ "loss": 0.4252,
+ "step": 108100
+ },
+ {
+ "epoch": 0.5579890723915365,
+ "grad_norm": 21949.1015625,
+ "learning_rate": 5.681062971961677e-05,
+ "loss": 0.4194,
+ "step": 108150
+ },
+ {
+ "epoch": 0.5582470423741493,
+ "grad_norm": 23135.21875,
+ "learning_rate": 5.677443823250099e-05,
+ "loss": 0.425,
+ "step": 108200
+ },
+ {
+ "epoch": 0.5585050123567622,
+ "grad_norm": 19050.34765625,
+ "learning_rate": 5.673824312932123e-05,
+ "loss": 0.422,
+ "step": 108250
+ },
+ {
+ "epoch": 0.5587629823393749,
+ "grad_norm": 22969.15625,
+ "learning_rate": 5.67020444293978e-05,
+ "loss": 0.4253,
+ "step": 108300
+ },
+ {
+ "epoch": 0.5590209523219878,
+ "grad_norm": 22808.203125,
+ "learning_rate": 5.666584215205282e-05,
+ "loss": 0.4261,
+ "step": 108350
+ },
+ {
+ "epoch": 0.5592789223046006,
+ "grad_norm": 23061.126953125,
+ "learning_rate": 5.662963631661038e-05,
+ "loss": 0.4248,
+ "step": 108400
+ },
+ {
+ "epoch": 0.5595368922872135,
+ "grad_norm": 24134.693359375,
+ "learning_rate": 5.659342694239642e-05,
+ "loss": 0.4273,
+ "step": 108450
+ },
+ {
+ "epoch": 0.5597948622698263,
+ "grad_norm": 23659.2578125,
+ "learning_rate": 5.655721404873886e-05,
+ "loss": 0.427,
+ "step": 108500
+ },
+ {
+ "epoch": 0.5600528322524391,
+ "grad_norm": 20205.1953125,
+ "learning_rate": 5.652099765496741e-05,
+ "loss": 0.4257,
+ "step": 108550
+ },
+ {
+ "epoch": 0.5603108022350519,
+ "grad_norm": 21324.837890625,
+ "learning_rate": 5.6484777780413686e-05,
+ "loss": 0.4248,
+ "step": 108600
+ },
+ {
+ "epoch": 0.5605687722176648,
+ "grad_norm": 21779.849609375,
+ "learning_rate": 5.644855444441114e-05,
+ "loss": 0.4259,
+ "step": 108650
+ },
+ {
+ "epoch": 0.5608267422002776,
+ "grad_norm": 20502.0859375,
+ "learning_rate": 5.641232766629512e-05,
+ "loss": 0.4281,
+ "step": 108700
+ },
+ {
+ "epoch": 0.5610847121828904,
+ "grad_norm": 23600.5859375,
+ "learning_rate": 5.637609746540276e-05,
+ "loss": 0.4183,
+ "step": 108750
+ },
+ {
+ "epoch": 0.5613426821655032,
+ "grad_norm": 22977.41015625,
+ "learning_rate": 5.633986386107302e-05,
+ "loss": 0.4219,
+ "step": 108800
+ },
+ {
+ "epoch": 0.561600652148116,
+ "grad_norm": 23411.263671875,
+ "learning_rate": 5.630362687264672e-05,
+ "loss": 0.4268,
+ "step": 108850
+ },
+ {
+ "epoch": 0.5618586221307289,
+ "grad_norm": 20194.060546875,
+ "learning_rate": 5.6267386519466446e-05,
+ "loss": 0.4175,
+ "step": 108900
+ },
+ {
+ "epoch": 0.5621165921133416,
+ "grad_norm": 19387.88671875,
+ "learning_rate": 5.623114282087664e-05,
+ "loss": 0.4274,
+ "step": 108950
+ },
+ {
+ "epoch": 0.5623745620959545,
+ "grad_norm": 23158.28125,
+ "learning_rate": 5.619489579622343e-05,
+ "loss": 0.4222,
+ "step": 109000
+ },
+ {
+ "epoch": 0.5626325320785673,
+ "grad_norm": 23551.431640625,
+ "learning_rate": 5.6158645464854817e-05,
+ "loss": 0.428,
+ "step": 109050
+ },
+ {
+ "epoch": 0.5628905020611802,
+ "grad_norm": 23904.896484375,
+ "learning_rate": 5.6122391846120495e-05,
+ "loss": 0.4252,
+ "step": 109100
+ },
+ {
+ "epoch": 0.563148472043793,
+ "grad_norm": 21354.61328125,
+ "learning_rate": 5.608613495937197e-05,
+ "loss": 0.4202,
+ "step": 109150
+ },
+ {
+ "epoch": 0.5634064420264058,
+ "grad_norm": 23561.978515625,
+ "learning_rate": 5.6049874823962456e-05,
+ "loss": 0.4301,
+ "step": 109200
+ },
+ {
+ "epoch": 0.5636644120090186,
+ "grad_norm": 20979.53515625,
+ "learning_rate": 5.601361145924692e-05,
+ "loss": 0.4204,
+ "step": 109250
+ },
+ {
+ "epoch": 0.5639223819916315,
+ "grad_norm": 24039.125,
+ "learning_rate": 5.5977344884582e-05,
+ "loss": 0.4284,
+ "step": 109300
+ },
+ {
+ "epoch": 0.5641803519742443,
+ "grad_norm": 22242.35546875,
+ "learning_rate": 5.594107511932615e-05,
+ "loss": 0.4248,
+ "step": 109350
+ },
+ {
+ "epoch": 0.5644383219568571,
+ "grad_norm": 20016.1875,
+ "learning_rate": 5.5904802182839434e-05,
+ "loss": 0.4222,
+ "step": 109400
+ },
+ {
+ "epoch": 0.5646962919394699,
+ "grad_norm": 22243.0703125,
+ "learning_rate": 5.5868526094483666e-05,
+ "loss": 0.4276,
+ "step": 109450
+ },
+ {
+ "epoch": 0.5649542619220828,
+ "grad_norm": 23286.38671875,
+ "learning_rate": 5.58322468736223e-05,
+ "loss": 0.4208,
+ "step": 109500
+ },
+ {
+ "epoch": 0.5652122319046956,
+ "grad_norm": 21801.802734375,
+ "learning_rate": 5.579596453962047e-05,
+ "loss": 0.4275,
+ "step": 109550
+ },
+ {
+ "epoch": 0.5654702018873083,
+ "grad_norm": 23282.025390625,
+ "learning_rate": 5.575967911184502e-05,
+ "loss": 0.4255,
+ "step": 109600
+ },
+ {
+ "epoch": 0.5657281718699212,
+ "grad_norm": 25253.943359375,
+ "learning_rate": 5.572339060966439e-05,
+ "loss": 0.4239,
+ "step": 109650
+ },
+ {
+ "epoch": 0.565986141852534,
+ "grad_norm": 22364.595703125,
+ "learning_rate": 5.5687099052448675e-05,
+ "loss": 0.4255,
+ "step": 109700
+ },
+ {
+ "epoch": 0.5662441118351469,
+ "grad_norm": 23305.46484375,
+ "learning_rate": 5.565080445956961e-05,
+ "loss": 0.4254,
+ "step": 109750
+ },
+ {
+ "epoch": 0.5665020818177597,
+ "grad_norm": 20225.2421875,
+ "learning_rate": 5.561450685040054e-05,
+ "loss": 0.4239,
+ "step": 109800
+ },
+ {
+ "epoch": 0.5667600518003725,
+ "grad_norm": 20221.8203125,
+ "learning_rate": 5.557820624431645e-05,
+ "loss": 0.4171,
+ "step": 109850
+ },
+ {
+ "epoch": 0.5670180217829853,
+ "grad_norm": 19833.607421875,
+ "learning_rate": 5.554190266069387e-05,
+ "loss": 0.4224,
+ "step": 109900
+ },
+ {
+ "epoch": 0.5672759917655982,
+ "grad_norm": 19884.58203125,
+ "learning_rate": 5.550559611891095e-05,
+ "loss": 0.4196,
+ "step": 109950
+ },
+ {
+ "epoch": 0.567533961748211,
+ "grad_norm": 22072.25390625,
+ "learning_rate": 5.546928663834745e-05,
+ "loss": 0.4196,
+ "step": 110000
+ },
+ {
+ "epoch": 0.567533961748211,
+ "eval_loss": 0.4103853106498718,
+ "eval_runtime": 3606.5234,
+ "eval_samples_per_second": 859.864,
+ "eval_steps_per_second": 1.679,
+ "step": 110000
+ },
+ {
+ "epoch": 0.5677919317308238,
+ "grad_norm": 21647.181640625,
+ "learning_rate": 5.543297423838464e-05,
+ "loss": 0.414,
+ "step": 110050
+ },
+ {
+ "epoch": 0.5680499017134366,
+ "grad_norm": 23264.748046875,
+ "learning_rate": 5.5396658938405396e-05,
+ "loss": 0.4192,
+ "step": 110100
+ },
+ {
+ "epoch": 0.5683078716960495,
+ "grad_norm": 21868.10546875,
+ "learning_rate": 5.536034075779409e-05,
+ "loss": 0.4222,
+ "step": 110150
+ },
+ {
+ "epoch": 0.5685658416786623,
+ "grad_norm": 22489.07421875,
+ "learning_rate": 5.53240197159367e-05,
+ "loss": 0.4237,
+ "step": 110200
+ },
+ {
+ "epoch": 0.568823811661275,
+ "grad_norm": 21589.173828125,
+ "learning_rate": 5.5287695832220674e-05,
+ "loss": 0.4218,
+ "step": 110250
+ },
+ {
+ "epoch": 0.5690817816438879,
+ "grad_norm": 23184.103515625,
+ "learning_rate": 5.525136912603501e-05,
+ "loss": 0.4203,
+ "step": 110300
+ },
+ {
+ "epoch": 0.5693397516265007,
+ "grad_norm": 23085.970703125,
+ "learning_rate": 5.521503961677019e-05,
+ "loss": 0.4216,
+ "step": 110350
+ },
+ {
+ "epoch": 0.5695977216091136,
+ "grad_norm": 22217.3671875,
+ "learning_rate": 5.51787073238182e-05,
+ "loss": 0.424,
+ "step": 110400
+ },
+ {
+ "epoch": 0.5698556915917263,
+ "grad_norm": 23515.9375,
+ "learning_rate": 5.514237226657253e-05,
+ "loss": 0.4217,
+ "step": 110450
+ },
+ {
+ "epoch": 0.5701136615743392,
+ "grad_norm": 21375.2734375,
+ "learning_rate": 5.510603446442812e-05,
+ "loss": 0.4175,
+ "step": 110500
+ },
+ {
+ "epoch": 0.570371631556952,
+ "grad_norm": 21658.15625,
+ "learning_rate": 5.506969393678139e-05,
+ "loss": 0.4191,
+ "step": 110550
+ },
+ {
+ "epoch": 0.5706296015395649,
+ "grad_norm": 24653.294921875,
+ "learning_rate": 5.503335070303018e-05,
+ "loss": 0.419,
+ "step": 110600
+ },
+ {
+ "epoch": 0.5708875715221777,
+ "grad_norm": 21722.984375,
+ "learning_rate": 5.4997004782573855e-05,
+ "loss": 0.4237,
+ "step": 110650
+ },
+ {
+ "epoch": 0.5711455415047905,
+ "grad_norm": 20897.595703125,
+ "learning_rate": 5.496065619481312e-05,
+ "loss": 0.4211,
+ "step": 110700
+ },
+ {
+ "epoch": 0.5714035114874033,
+ "grad_norm": 20729.123046875,
+ "learning_rate": 5.4924304959150175e-05,
+ "loss": 0.4228,
+ "step": 110750
+ },
+ {
+ "epoch": 0.5716614814700162,
+ "grad_norm": 22107.888671875,
+ "learning_rate": 5.488795109498861e-05,
+ "loss": 0.4222,
+ "step": 110800
+ },
+ {
+ "epoch": 0.571919451452629,
+ "grad_norm": 24264.587890625,
+ "learning_rate": 5.485159462173337e-05,
+ "loss": 0.4232,
+ "step": 110850
+ },
+ {
+ "epoch": 0.5721774214352418,
+ "grad_norm": 23000.34375,
+ "learning_rate": 5.481523555879089e-05,
+ "loss": 0.4236,
+ "step": 110900
+ },
+ {
+ "epoch": 0.5724353914178546,
+ "grad_norm": 20345.26953125,
+ "learning_rate": 5.4778873925568905e-05,
+ "loss": 0.4254,
+ "step": 110950
+ },
+ {
+ "epoch": 0.5726933614004674,
+ "grad_norm": 25514.09765625,
+ "learning_rate": 5.4742509741476566e-05,
+ "loss": 0.4247,
+ "step": 111000
+ },
+ {
+ "epoch": 0.5729513313830803,
+ "grad_norm": 22510.115234375,
+ "learning_rate": 5.470614302592434e-05,
+ "loss": 0.4271,
+ "step": 111050
+ },
+ {
+ "epoch": 0.573209301365693,
+ "grad_norm": 24683.4921875,
+ "learning_rate": 5.466977379832411e-05,
+ "loss": 0.4207,
+ "step": 111100
+ },
+ {
+ "epoch": 0.5734672713483059,
+ "grad_norm": 22154.2890625,
+ "learning_rate": 5.4633402078089035e-05,
+ "loss": 0.422,
+ "step": 111150
+ },
+ {
+ "epoch": 0.5737252413309187,
+ "grad_norm": 23333.966796875,
+ "learning_rate": 5.459702788463367e-05,
+ "loss": 0.4218,
+ "step": 111200
+ },
+ {
+ "epoch": 0.5739832113135316,
+ "grad_norm": 26566.900390625,
+ "learning_rate": 5.4560651237373814e-05,
+ "loss": 0.4269,
+ "step": 111250
+ },
+ {
+ "epoch": 0.5742411812961444,
+ "grad_norm": 21463.828125,
+ "learning_rate": 5.452427215572666e-05,
+ "loss": 0.4196,
+ "step": 111300
+ },
+ {
+ "epoch": 0.5744991512787572,
+ "grad_norm": 24921.373046875,
+ "learning_rate": 5.448789065911064e-05,
+ "loss": 0.4248,
+ "step": 111350
+ },
+ {
+ "epoch": 0.57475712126137,
+ "grad_norm": 23610.16015625,
+ "learning_rate": 5.445150676694548e-05,
+ "loss": 0.4245,
+ "step": 111400
+ },
+ {
+ "epoch": 0.5750150912439829,
+ "grad_norm": 24598.2109375,
+ "learning_rate": 5.441512049865221e-05,
+ "loss": 0.4199,
+ "step": 111450
+ },
+ {
+ "epoch": 0.5752730612265957,
+ "grad_norm": 24330.02734375,
+ "learning_rate": 5.43787318736531e-05,
+ "loss": 0.423,
+ "step": 111500
+ },
+ {
+ "epoch": 0.5755310312092085,
+ "grad_norm": 23434.587890625,
+ "learning_rate": 5.434234091137171e-05,
+ "loss": 0.4214,
+ "step": 111550
+ },
+ {
+ "epoch": 0.5757890011918213,
+ "grad_norm": 25007.08203125,
+ "learning_rate": 5.430594763123283e-05,
+ "loss": 0.4258,
+ "step": 111600
+ },
+ {
+ "epoch": 0.5760469711744342,
+ "grad_norm": 24568.759765625,
+ "learning_rate": 5.4269552052662486e-05,
+ "loss": 0.4248,
+ "step": 111650
+ },
+ {
+ "epoch": 0.576304941157047,
+ "grad_norm": 22131.74609375,
+ "learning_rate": 5.423315419508792e-05,
+ "loss": 0.418,
+ "step": 111700
+ },
+ {
+ "epoch": 0.5765629111396597,
+ "grad_norm": 22058.443359375,
+ "learning_rate": 5.4196754077937626e-05,
+ "loss": 0.4289,
+ "step": 111750
+ },
+ {
+ "epoch": 0.5768208811222726,
+ "grad_norm": 23790.3203125,
+ "learning_rate": 5.4160351720641276e-05,
+ "loss": 0.4217,
+ "step": 111800
+ },
+ {
+ "epoch": 0.5770788511048854,
+ "grad_norm": 20349.287109375,
+ "learning_rate": 5.412394714262974e-05,
+ "loss": 0.4154,
+ "step": 111850
+ },
+ {
+ "epoch": 0.5773368210874983,
+ "grad_norm": 20262.9296875,
+ "learning_rate": 5.408754036333506e-05,
+ "loss": 0.4214,
+ "step": 111900
+ },
+ {
+ "epoch": 0.5775947910701111,
+ "grad_norm": 21678.17578125,
+ "learning_rate": 5.4051131402190494e-05,
+ "loss": 0.4168,
+ "step": 111950
+ },
+ {
+ "epoch": 0.5778527610527239,
+ "grad_norm": 22649.1875,
+ "learning_rate": 5.4014720278630415e-05,
+ "loss": 0.4206,
+ "step": 112000
+ },
+ {
+ "epoch": 0.5781107310353367,
+ "grad_norm": 22304.1328125,
+ "learning_rate": 5.39783070120904e-05,
+ "loss": 0.4199,
+ "step": 112050
+ },
+ {
+ "epoch": 0.5783687010179496,
+ "grad_norm": 22573.169921875,
+ "learning_rate": 5.394189162200715e-05,
+ "loss": 0.4325,
+ "step": 112100
+ },
+ {
+ "epoch": 0.5786266710005624,
+ "grad_norm": 22942.09765625,
+ "learning_rate": 5.390547412781847e-05,
+ "loss": 0.416,
+ "step": 112150
+ },
+ {
+ "epoch": 0.5788846409831752,
+ "grad_norm": 20210.18359375,
+ "learning_rate": 5.386905454896333e-05,
+ "loss": 0.4274,
+ "step": 112200
+ },
+ {
+ "epoch": 0.579142610965788,
+ "grad_norm": 22916.09375,
+ "learning_rate": 5.38326329048818e-05,
+ "loss": 0.4208,
+ "step": 112250
+ },
+ {
+ "epoch": 0.5794005809484009,
+ "grad_norm": 21563.78125,
+ "learning_rate": 5.379620921501503e-05,
+ "loss": 0.4264,
+ "step": 112300
+ },
+ {
+ "epoch": 0.5796585509310137,
+ "grad_norm": 20984.3671875,
+ "learning_rate": 5.375978349880528e-05,
+ "loss": 0.4229,
+ "step": 112350
+ },
+ {
+ "epoch": 0.5799165209136264,
+ "grad_norm": 22014.6640625,
+ "learning_rate": 5.372335577569592e-05,
+ "loss": 0.4205,
+ "step": 112400
+ },
+ {
+ "epoch": 0.5801744908962393,
+ "grad_norm": 22977.39453125,
+ "learning_rate": 5.3686926065131325e-05,
+ "loss": 0.4248,
+ "step": 112450
+ },
+ {
+ "epoch": 0.5804324608788521,
+ "grad_norm": 22589.521484375,
+ "learning_rate": 5.365049438655702e-05,
+ "loss": 0.4165,
+ "step": 112500
+ },
+ {
+ "epoch": 0.580690430861465,
+ "grad_norm": 24455.625,
+ "learning_rate": 5.3614060759419474e-05,
+ "loss": 0.4224,
+ "step": 112550
+ },
+ {
+ "epoch": 0.5809484008440777,
+ "grad_norm": 24485.833984375,
+ "learning_rate": 5.357762520316628e-05,
+ "loss": 0.4264,
+ "step": 112600
+ },
+ {
+ "epoch": 0.5812063708266906,
+ "grad_norm": 23294.244140625,
+ "learning_rate": 5.354118773724603e-05,
+ "loss": 0.4254,
+ "step": 112650
+ },
+ {
+ "epoch": 0.5814643408093034,
+ "grad_norm": 21813.884765625,
+ "learning_rate": 5.350474838110835e-05,
+ "loss": 0.4226,
+ "step": 112700
+ },
+ {
+ "epoch": 0.5817223107919163,
+ "grad_norm": 23532.0546875,
+ "learning_rate": 5.3468307154203836e-05,
+ "loss": 0.4236,
+ "step": 112750
+ },
+ {
+ "epoch": 0.5819802807745291,
+ "grad_norm": 24739.787109375,
+ "learning_rate": 5.343186407598413e-05,
+ "loss": 0.4276,
+ "step": 112800
+ },
+ {
+ "epoch": 0.5822382507571419,
+ "grad_norm": 23312.783203125,
+ "learning_rate": 5.339541916590184e-05,
+ "loss": 0.4281,
+ "step": 112850
+ },
+ {
+ "epoch": 0.5824962207397547,
+ "grad_norm": 24166.798828125,
+ "learning_rate": 5.335897244341054e-05,
+ "loss": 0.4238,
+ "step": 112900
+ },
+ {
+ "epoch": 0.5827541907223676,
+ "grad_norm": 23690.455078125,
+ "learning_rate": 5.332252392796478e-05,
+ "loss": 0.4181,
+ "step": 112950
+ },
+ {
+ "epoch": 0.5830121607049804,
+ "grad_norm": 23499.16015625,
+ "learning_rate": 5.32860736390201e-05,
+ "loss": 0.4143,
+ "step": 113000
+ },
+ {
+ "epoch": 0.5832701306875931,
+ "grad_norm": 23299.5625,
+ "learning_rate": 5.324962159603294e-05,
+ "loss": 0.4198,
+ "step": 113050
+ },
+ {
+ "epoch": 0.583528100670206,
+ "grad_norm": 22958.423828125,
+ "learning_rate": 5.321316781846071e-05,
+ "loss": 0.421,
+ "step": 113100
+ },
+ {
+ "epoch": 0.5837860706528188,
+ "grad_norm": 20775.119140625,
+ "learning_rate": 5.3176712325761704e-05,
+ "loss": 0.4148,
+ "step": 113150
+ },
+ {
+ "epoch": 0.5840440406354317,
+ "grad_norm": 23139.953125,
+ "learning_rate": 5.3140255137395155e-05,
+ "loss": 0.422,
+ "step": 113200
+ },
+ {
+ "epoch": 0.5843020106180444,
+ "grad_norm": 19829.94140625,
+ "learning_rate": 5.310379627282125e-05,
+ "loss": 0.4248,
+ "step": 113250
+ },
+ {
+ "epoch": 0.5845599806006573,
+ "grad_norm": 20085.572265625,
+ "learning_rate": 5.306733575150099e-05,
+ "loss": 0.4183,
+ "step": 113300
+ },
+ {
+ "epoch": 0.5848179505832701,
+ "grad_norm": 23206.27734375,
+ "learning_rate": 5.303087359289629e-05,
+ "loss": 0.423,
+ "step": 113350
+ },
+ {
+ "epoch": 0.585075920565883,
+ "grad_norm": 25039.34765625,
+ "learning_rate": 5.299440981646996e-05,
+ "loss": 0.4232,
+ "step": 113400
+ },
+ {
+ "epoch": 0.5853338905484958,
+ "grad_norm": 21276.865234375,
+ "learning_rate": 5.2957944441685646e-05,
+ "loss": 0.4205,
+ "step": 113450
+ },
+ {
+ "epoch": 0.5855918605311086,
+ "grad_norm": 22706.712890625,
+ "learning_rate": 5.292147748800788e-05,
+ "loss": 0.4225,
+ "step": 113500
+ },
+ {
+ "epoch": 0.5858498305137214,
+ "grad_norm": 18046.537109375,
+ "learning_rate": 5.2885008974902004e-05,
+ "loss": 0.4195,
+ "step": 113550
+ },
+ {
+ "epoch": 0.5861078004963343,
+ "grad_norm": 22363.5625,
+ "learning_rate": 5.28485389218342e-05,
+ "loss": 0.4149,
+ "step": 113600
+ },
+ {
+ "epoch": 0.5863657704789471,
+ "grad_norm": 24409.609375,
+ "learning_rate": 5.2812067348271466e-05,
+ "loss": 0.4224,
+ "step": 113650
+ },
+ {
+ "epoch": 0.5866237404615599,
+ "grad_norm": 23921.68359375,
+ "learning_rate": 5.277559427368164e-05,
+ "loss": 0.4274,
+ "step": 113700
+ },
+ {
+ "epoch": 0.5868817104441727,
+ "grad_norm": 23887.84765625,
+ "learning_rate": 5.273911971753335e-05,
+ "loss": 0.4185,
+ "step": 113750
+ },
+ {
+ "epoch": 0.5871396804267855,
+ "grad_norm": 23169.423828125,
+ "learning_rate": 5.270264369929597e-05,
+ "loss": 0.4218,
+ "step": 113800
+ },
+ {
+ "epoch": 0.5873976504093984,
+ "grad_norm": 23339.57421875,
+ "learning_rate": 5.266616623843972e-05,
+ "loss": 0.4211,
+ "step": 113850
+ },
+ {
+ "epoch": 0.5876556203920111,
+ "grad_norm": 22072.59765625,
+ "learning_rate": 5.2629687354435576e-05,
+ "loss": 0.4191,
+ "step": 113900
+ },
+ {
+ "epoch": 0.587913590374624,
+ "grad_norm": 24308.357421875,
+ "learning_rate": 5.259320706675523e-05,
+ "loss": 0.4168,
+ "step": 113950
+ },
+ {
+ "epoch": 0.5881715603572368,
+ "grad_norm": 20896.5703125,
+ "learning_rate": 5.255672539487119e-05,
+ "loss": 0.4201,
+ "step": 114000
+ },
+ {
+ "epoch": 0.5884295303398497,
+ "grad_norm": 20070.814453125,
+ "learning_rate": 5.252024235825661e-05,
+ "loss": 0.4216,
+ "step": 114050
+ },
+ {
+ "epoch": 0.5886875003224625,
+ "grad_norm": 24864.869140625,
+ "learning_rate": 5.2483757976385486e-05,
+ "loss": 0.4269,
+ "step": 114100
+ },
+ {
+ "epoch": 0.5889454703050753,
+ "grad_norm": 24734.0234375,
+ "learning_rate": 5.2447272268732436e-05,
+ "loss": 0.4196,
+ "step": 114150
+ },
+ {
+ "epoch": 0.5892034402876881,
+ "grad_norm": 22383.212890625,
+ "learning_rate": 5.2410785254772856e-05,
+ "loss": 0.4171,
+ "step": 114200
+ },
+ {
+ "epoch": 0.589461410270301,
+ "grad_norm": 22587.44921875,
+ "learning_rate": 5.237429695398276e-05,
+ "loss": 0.4232,
+ "step": 114250
+ },
+ {
+ "epoch": 0.5897193802529138,
+ "grad_norm": 23357.263671875,
+ "learning_rate": 5.2337807385838935e-05,
+ "loss": 0.4241,
+ "step": 114300
+ },
+ {
+ "epoch": 0.5899773502355266,
+ "grad_norm": 24632.125,
+ "learning_rate": 5.23013165698188e-05,
+ "loss": 0.4154,
+ "step": 114350
+ },
+ {
+ "epoch": 0.5902353202181394,
+ "grad_norm": 23191.818359375,
+ "learning_rate": 5.226482452540045e-05,
+ "loss": 0.424,
+ "step": 114400
+ },
+ {
+ "epoch": 0.5904932902007523,
+ "grad_norm": 23649.560546875,
+ "learning_rate": 5.2228331272062626e-05,
+ "loss": 0.427,
+ "step": 114450
+ },
+ {
+ "epoch": 0.5907512601833651,
+ "grad_norm": 23992.169921875,
+ "learning_rate": 5.21918368292847e-05,
+ "loss": 0.4267,
+ "step": 114500
+ },
+ {
+ "epoch": 0.5910092301659778,
+ "grad_norm": 21792.041015625,
+ "learning_rate": 5.215534121654673e-05,
+ "loss": 0.4272,
+ "step": 114550
+ },
+ {
+ "epoch": 0.5912672001485907,
+ "grad_norm": 25516.345703125,
+ "learning_rate": 5.211884445332935e-05,
+ "loss": 0.4207,
+ "step": 114600
+ },
+ {
+ "epoch": 0.5915251701312035,
+ "grad_norm": 22557.25390625,
+ "learning_rate": 5.208234655911384e-05,
+ "loss": 0.4228,
+ "step": 114650
+ },
+ {
+ "epoch": 0.5917831401138164,
+ "grad_norm": 24185.09765625,
+ "learning_rate": 5.2045847553382045e-05,
+ "loss": 0.4226,
+ "step": 114700
+ },
+ {
+ "epoch": 0.5920411100964291,
+ "grad_norm": 20565.134765625,
+ "learning_rate": 5.200934745561643e-05,
+ "loss": 0.4274,
+ "step": 114750
+ },
+ {
+ "epoch": 0.592299080079042,
+ "grad_norm": 24019.0,
+ "learning_rate": 5.197284628530007e-05,
+ "loss": 0.4234,
+ "step": 114800
+ },
+ {
+ "epoch": 0.5925570500616548,
+ "grad_norm": 26129.01171875,
+ "learning_rate": 5.193634406191658e-05,
+ "loss": 0.418,
+ "step": 114850
+ },
+ {
+ "epoch": 0.5928150200442677,
+ "grad_norm": 25187.611328125,
+ "learning_rate": 5.18998408049501e-05,
+ "loss": 0.4213,
+ "step": 114900
+ },
+ {
+ "epoch": 0.5930729900268805,
+ "grad_norm": 20145.14453125,
+ "learning_rate": 5.186333653388539e-05,
+ "loss": 0.418,
+ "step": 114950
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "grad_norm": 22472.322265625,
+ "learning_rate": 5.182683126820773e-05,
+ "loss": 0.4209,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "eval_loss": 0.4084908068180084,
+ "eval_runtime": 3582.6916,
+ "eval_samples_per_second": 865.584,
+ "eval_steps_per_second": 1.691,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5935889299921061,
+ "grad_norm": 22404.052734375,
+ "learning_rate": 5.179032502740291e-05,
+ "loss": 0.4147,
+ "step": 115050
+ },
+ {
+ "epoch": 0.593846899974719,
+ "grad_norm": 21242.705078125,
+ "learning_rate": 5.175381783095725e-05,
+ "loss": 0.4237,
+ "step": 115100
+ },
+ {
+ "epoch": 0.5941048699573318,
+ "grad_norm": 22416.06640625,
+ "learning_rate": 5.171730969835758e-05,
+ "loss": 0.4185,
+ "step": 115150
+ },
+ {
+ "epoch": 0.5943628399399445,
+ "grad_norm": 22231.525390625,
+ "learning_rate": 5.1680800649091276e-05,
+ "loss": 0.4227,
+ "step": 115200
+ },
+ {
+ "epoch": 0.5946208099225574,
+ "grad_norm": 22431.462890625,
+ "learning_rate": 5.164429070264613e-05,
+ "loss": 0.4225,
+ "step": 115250
+ },
+ {
+ "epoch": 0.5948787799051702,
+ "grad_norm": 26008.57421875,
+ "learning_rate": 5.160777987851044e-05,
+ "loss": 0.4253,
+ "step": 115300
+ },
+ {
+ "epoch": 0.5951367498877831,
+ "grad_norm": 22555.501953125,
+ "learning_rate": 5.157126819617297e-05,
+ "loss": 0.4181,
+ "step": 115350
+ },
+ {
+ "epoch": 0.5953947198703958,
+ "grad_norm": 25113.587890625,
+ "learning_rate": 5.153475567512298e-05,
+ "loss": 0.4261,
+ "step": 115400
+ },
+ {
+ "epoch": 0.5956526898530087,
+ "grad_norm": 22877.908203125,
+ "learning_rate": 5.149824233485013e-05,
+ "loss": 0.4177,
+ "step": 115450
+ },
+ {
+ "epoch": 0.5959106598356215,
+ "grad_norm": 22468.34375,
+ "learning_rate": 5.1461728194844526e-05,
+ "loss": 0.4223,
+ "step": 115500
+ },
+ {
+ "epoch": 0.5961686298182344,
+ "grad_norm": 24525.326171875,
+ "learning_rate": 5.142521327459672e-05,
+ "loss": 0.4159,
+ "step": 115550
+ },
+ {
+ "epoch": 0.5964265998008472,
+ "grad_norm": 23334.296875,
+ "learning_rate": 5.1388697593597643e-05,
+ "loss": 0.4206,
+ "step": 115600
+ },
+ {
+ "epoch": 0.59668456978346,
+ "grad_norm": 21743.333984375,
+ "learning_rate": 5.1352181171338706e-05,
+ "loss": 0.4191,
+ "step": 115650
+ },
+ {
+ "epoch": 0.5969425397660728,
+ "grad_norm": 26287.66796875,
+ "learning_rate": 5.131566402731165e-05,
+ "loss": 0.4147,
+ "step": 115700
+ },
+ {
+ "epoch": 0.5972005097486857,
+ "grad_norm": 20856.890625,
+ "learning_rate": 5.12791461810086e-05,
+ "loss": 0.4248,
+ "step": 115750
+ },
+ {
+ "epoch": 0.5974584797312985,
+ "grad_norm": 22821.73046875,
+ "learning_rate": 5.124262765192208e-05,
+ "loss": 0.4239,
+ "step": 115800
+ },
+ {
+ "epoch": 0.5977164497139112,
+ "grad_norm": 24805.427734375,
+ "learning_rate": 5.1206108459545e-05,
+ "loss": 0.4172,
+ "step": 115850
+ },
+ {
+ "epoch": 0.5979744196965241,
+ "grad_norm": 25195.064453125,
+ "learning_rate": 5.116958862337057e-05,
+ "loss": 0.4242,
+ "step": 115900
+ },
+ {
+ "epoch": 0.5982323896791369,
+ "grad_norm": 22029.236328125,
+ "learning_rate": 5.1133068162892383e-05,
+ "loss": 0.4217,
+ "step": 115950
+ },
+ {
+ "epoch": 0.5984903596617498,
+ "grad_norm": 23296.77734375,
+ "learning_rate": 5.109654709760434e-05,
+ "loss": 0.4223,
+ "step": 116000
+ },
+ {
+ "epoch": 0.5987483296443625,
+ "grad_norm": 23822.447265625,
+ "learning_rate": 5.106002544700069e-05,
+ "loss": 0.4235,
+ "step": 116050
+ },
+ {
+ "epoch": 0.5990062996269754,
+ "grad_norm": 21188.46875,
+ "learning_rate": 5.1023503230576e-05,
+ "loss": 0.4275,
+ "step": 116100
+ },
+ {
+ "epoch": 0.5992642696095882,
+ "grad_norm": 24459.021484375,
+ "learning_rate": 5.0986980467825096e-05,
+ "loss": 0.4217,
+ "step": 116150
+ },
+ {
+ "epoch": 0.5995222395922011,
+ "grad_norm": 22304.396484375,
+ "learning_rate": 5.095045717824313e-05,
+ "loss": 0.42,
+ "step": 116200
+ },
+ {
+ "epoch": 0.5997802095748139,
+ "grad_norm": 20124.943359375,
+ "learning_rate": 5.0913933381325516e-05,
+ "loss": 0.4149,
+ "step": 116250
+ },
+ {
+ "epoch": 0.6000381795574267,
+ "grad_norm": 22610.3046875,
+ "learning_rate": 5.087740909656798e-05,
+ "loss": 0.4164,
+ "step": 116300
+ },
+ {
+ "epoch": 0.6002961495400395,
+ "grad_norm": 22058.974609375,
+ "learning_rate": 5.084088434346645e-05,
+ "loss": 0.4211,
+ "step": 116350
+ },
+ {
+ "epoch": 0.6005541195226524,
+ "grad_norm": 23463.626953125,
+ "learning_rate": 5.0804359141517134e-05,
+ "loss": 0.4182,
+ "step": 116400
+ },
+ {
+ "epoch": 0.6008120895052652,
+ "grad_norm": 25045.67578125,
+ "learning_rate": 5.076783351021648e-05,
+ "loss": 0.4202,
+ "step": 116450
+ },
+ {
+ "epoch": 0.601070059487878,
+ "grad_norm": 22583.076171875,
+ "learning_rate": 5.0731307469061184e-05,
+ "loss": 0.4177,
+ "step": 116500
+ },
+ {
+ "epoch": 0.6013280294704908,
+ "grad_norm": 26350.400390625,
+ "learning_rate": 5.069478103754811e-05,
+ "loss": 0.4193,
+ "step": 116550
+ },
+ {
+ "epoch": 0.6015859994531036,
+ "grad_norm": 21430.255859375,
+ "learning_rate": 5.0658254235174385e-05,
+ "loss": 0.422,
+ "step": 116600
+ },
+ {
+ "epoch": 0.6018439694357165,
+ "grad_norm": 21467.482421875,
+ "learning_rate": 5.0621727081437275e-05,
+ "loss": 0.4157,
+ "step": 116650
+ },
+ {
+ "epoch": 0.6021019394183292,
+ "grad_norm": 25780.095703125,
+ "learning_rate": 5.05851995958343e-05,
+ "loss": 0.4243,
+ "step": 116700
+ },
+ {
+ "epoch": 0.6023599094009421,
+ "grad_norm": 21074.52734375,
+ "learning_rate": 5.0548671797863125e-05,
+ "loss": 0.4271,
+ "step": 116750
+ },
+ {
+ "epoch": 0.6026178793835549,
+ "grad_norm": 25752.841796875,
+ "learning_rate": 5.051214370702155e-05,
+ "loss": 0.4209,
+ "step": 116800
+ },
+ {
+ "epoch": 0.6028758493661678,
+ "grad_norm": 23178.93359375,
+ "learning_rate": 5.047561534280758e-05,
+ "loss": 0.4232,
+ "step": 116850
+ },
+ {
+ "epoch": 0.6031338193487806,
+ "grad_norm": 23263.65625,
+ "learning_rate": 5.0439086724719355e-05,
+ "loss": 0.4196,
+ "step": 116900
+ },
+ {
+ "epoch": 0.6033917893313934,
+ "grad_norm": 20372.861328125,
+ "learning_rate": 5.040255787225514e-05,
+ "loss": 0.4194,
+ "step": 116950
+ },
+ {
+ "epoch": 0.6036497593140062,
+ "grad_norm": 23453.59375,
+ "learning_rate": 5.036602880491332e-05,
+ "loss": 0.4156,
+ "step": 117000
+ },
+ {
+ "epoch": 0.6039077292966191,
+ "grad_norm": 24039.392578125,
+ "learning_rate": 5.032949954219243e-05,
+ "loss": 0.4192,
+ "step": 117050
+ },
+ {
+ "epoch": 0.6041656992792319,
+ "grad_norm": 24246.55859375,
+ "learning_rate": 5.0292970103591044e-05,
+ "loss": 0.426,
+ "step": 117100
+ },
+ {
+ "epoch": 0.6044236692618447,
+ "grad_norm": 23403.130859375,
+ "learning_rate": 5.0256440508607904e-05,
+ "loss": 0.4195,
+ "step": 117150
+ },
+ {
+ "epoch": 0.6046816392444575,
+ "grad_norm": 21872.07421875,
+ "learning_rate": 5.021991077674179e-05,
+ "loss": 0.4214,
+ "step": 117200
+ },
+ {
+ "epoch": 0.6049396092270704,
+ "grad_norm": 22344.455078125,
+ "learning_rate": 5.018338092749155e-05,
+ "loss": 0.4205,
+ "step": 117250
+ },
+ {
+ "epoch": 0.6051975792096832,
+ "grad_norm": 22999.099609375,
+ "learning_rate": 5.014685098035612e-05,
+ "loss": 0.4203,
+ "step": 117300
+ },
+ {
+ "epoch": 0.6054555491922959,
+ "grad_norm": 21572.994140625,
+ "learning_rate": 5.011032095483448e-05,
+ "loss": 0.4279,
+ "step": 117350
+ },
+ {
+ "epoch": 0.6057135191749088,
+ "grad_norm": 21263.11328125,
+ "learning_rate": 5.007379087042566e-05,
+ "loss": 0.418,
+ "step": 117400
+ },
+ {
+ "epoch": 0.6059714891575216,
+ "grad_norm": 22789.671875,
+ "learning_rate": 5.00372607466287e-05,
+ "loss": 0.4196,
+ "step": 117450
+ },
+ {
+ "epoch": 0.6062294591401345,
+ "grad_norm": 21276.09765625,
+ "learning_rate": 5.000073060294267e-05,
+ "loss": 0.4125,
+ "step": 117500
+ },
+ {
+ "epoch": 0.6064874291227472,
+ "grad_norm": 22501.169921875,
+ "learning_rate": 4.9964200458866654e-05,
+ "loss": 0.4152,
+ "step": 117550
+ },
+ {
+ "epoch": 0.6067453991053601,
+ "grad_norm": 21645.912109375,
+ "learning_rate": 4.992767033389976e-05,
+ "loss": 0.4253,
+ "step": 117600
+ },
+ {
+ "epoch": 0.6070033690879729,
+ "grad_norm": 21256.7109375,
+ "learning_rate": 4.9891140247541025e-05,
+ "loss": 0.4214,
+ "step": 117650
+ },
+ {
+ "epoch": 0.6072613390705858,
+ "grad_norm": 22883.98046875,
+ "learning_rate": 4.985461021928952e-05,
+ "loss": 0.4238,
+ "step": 117700
+ },
+ {
+ "epoch": 0.6075193090531986,
+ "grad_norm": 21366.412109375,
+ "learning_rate": 4.981808026864426e-05,
+ "loss": 0.4225,
+ "step": 117750
+ },
+ {
+ "epoch": 0.6077772790358114,
+ "grad_norm": 24185.3515625,
+ "learning_rate": 4.978155041510425e-05,
+ "loss": 0.4196,
+ "step": 117800
+ },
+ {
+ "epoch": 0.6080352490184242,
+ "grad_norm": 21638.009765625,
+ "learning_rate": 4.974502067816838e-05,
+ "loss": 0.4221,
+ "step": 117850
+ },
+ {
+ "epoch": 0.608293219001037,
+ "grad_norm": 20867.111328125,
+ "learning_rate": 4.970849107733554e-05,
+ "loss": 0.4225,
+ "step": 117900
+ },
+ {
+ "epoch": 0.6085511889836499,
+ "grad_norm": 21785.69140625,
+ "learning_rate": 4.967196163210451e-05,
+ "loss": 0.4166,
+ "step": 117950
+ },
+ {
+ "epoch": 0.6088091589662626,
+ "grad_norm": 24691.8515625,
+ "learning_rate": 4.963543236197401e-05,
+ "loss": 0.4226,
+ "step": 118000
+ },
+ {
+ "epoch": 0.6090671289488755,
+ "grad_norm": 21214.1484375,
+ "learning_rate": 4.9598903286442675e-05,
+ "loss": 0.418,
+ "step": 118050
+ },
+ {
+ "epoch": 0.6093250989314883,
+ "grad_norm": 22802.849609375,
+ "learning_rate": 4.956237442500898e-05,
+ "loss": 0.4227,
+ "step": 118100
+ },
+ {
+ "epoch": 0.6095830689141012,
+ "grad_norm": 25204.90625,
+ "learning_rate": 4.952584579717135e-05,
+ "loss": 0.4152,
+ "step": 118150
+ },
+ {
+ "epoch": 0.6098410388967139,
+ "grad_norm": 21970.19140625,
+ "learning_rate": 4.9489317422428044e-05,
+ "loss": 0.4197,
+ "step": 118200
+ },
+ {
+ "epoch": 0.6100990088793268,
+ "grad_norm": 22331.013671875,
+ "learning_rate": 4.945278932027723e-05,
+ "loss": 0.4161,
+ "step": 118250
+ },
+ {
+ "epoch": 0.6103569788619396,
+ "grad_norm": 27234.177734375,
+ "learning_rate": 4.941626151021686e-05,
+ "loss": 0.4204,
+ "step": 118300
+ },
+ {
+ "epoch": 0.6106149488445525,
+ "grad_norm": 22253.0078125,
+ "learning_rate": 4.937973401174481e-05,
+ "loss": 0.4202,
+ "step": 118350
+ },
+ {
+ "epoch": 0.6108729188271653,
+ "grad_norm": 20930.27734375,
+ "learning_rate": 4.934320684435871e-05,
+ "loss": 0.4169,
+ "step": 118400
+ },
+ {
+ "epoch": 0.6111308888097781,
+ "grad_norm": 22569.205078125,
+ "learning_rate": 4.930668002755609e-05,
+ "loss": 0.4177,
+ "step": 118450
+ },
+ {
+ "epoch": 0.6113888587923909,
+ "grad_norm": 23197.943359375,
+ "learning_rate": 4.9270153580834256e-05,
+ "loss": 0.414,
+ "step": 118500
+ },
+ {
+ "epoch": 0.6116468287750038,
+ "grad_norm": 21927.1875,
+ "learning_rate": 4.923362752369029e-05,
+ "loss": 0.4203,
+ "step": 118550
+ },
+ {
+ "epoch": 0.6119047987576166,
+ "grad_norm": 23422.181640625,
+ "learning_rate": 4.919710187562112e-05,
+ "loss": 0.4213,
+ "step": 118600
+ },
+ {
+ "epoch": 0.6121627687402293,
+ "grad_norm": 23351.67578125,
+ "learning_rate": 4.9160576656123416e-05,
+ "loss": 0.4213,
+ "step": 118650
+ },
+ {
+ "epoch": 0.6124207387228422,
+ "grad_norm": 21228.416015625,
+ "learning_rate": 4.9124051884693664e-05,
+ "loss": 0.4192,
+ "step": 118700
+ },
+ {
+ "epoch": 0.612678708705455,
+ "grad_norm": 22555.9609375,
+ "learning_rate": 4.908752758082802e-05,
+ "loss": 0.4189,
+ "step": 118750
+ },
+ {
+ "epoch": 0.6129366786880679,
+ "grad_norm": 21010.859375,
+ "learning_rate": 4.905100376402251e-05,
+ "loss": 0.4194,
+ "step": 118800
+ },
+ {
+ "epoch": 0.6131946486706806,
+ "grad_norm": 23468.78515625,
+ "learning_rate": 4.901448045377279e-05,
+ "loss": 0.4151,
+ "step": 118850
+ },
+ {
+ "epoch": 0.6134526186532935,
+ "grad_norm": 23818.189453125,
+ "learning_rate": 4.8977957669574334e-05,
+ "loss": 0.4184,
+ "step": 118900
+ },
+ {
+ "epoch": 0.6137105886359063,
+ "grad_norm": 22162.76171875,
+ "learning_rate": 4.8941435430922294e-05,
+ "loss": 0.4181,
+ "step": 118950
+ },
+ {
+ "epoch": 0.6139685586185192,
+ "grad_norm": 22983.45703125,
+ "learning_rate": 4.8904913757311506e-05,
+ "loss": 0.4196,
+ "step": 119000
+ },
+ {
+ "epoch": 0.614226528601132,
+ "grad_norm": 22748.150390625,
+ "learning_rate": 4.886839266823656e-05,
+ "loss": 0.4195,
+ "step": 119050
+ },
+ {
+ "epoch": 0.6144844985837448,
+ "grad_norm": 23146.306640625,
+ "learning_rate": 4.8831872183191684e-05,
+ "loss": 0.4219,
+ "step": 119100
+ },
+ {
+ "epoch": 0.6147424685663576,
+ "grad_norm": 24951.591796875,
+ "learning_rate": 4.879535232167084e-05,
+ "loss": 0.4165,
+ "step": 119150
+ },
+ {
+ "epoch": 0.6150004385489705,
+ "grad_norm": 24381.689453125,
+ "learning_rate": 4.875883310316758e-05,
+ "loss": 0.4179,
+ "step": 119200
+ },
+ {
+ "epoch": 0.6152584085315833,
+ "grad_norm": 21191.4609375,
+ "learning_rate": 4.872231454717518e-05,
+ "loss": 0.4155,
+ "step": 119250
+ },
+ {
+ "epoch": 0.615516378514196,
+ "grad_norm": 21586.84375,
+ "learning_rate": 4.8685796673186526e-05,
+ "loss": 0.412,
+ "step": 119300
+ },
+ {
+ "epoch": 0.6157743484968089,
+ "grad_norm": 20381.505859375,
+ "learning_rate": 4.864927950069416e-05,
+ "loss": 0.4171,
+ "step": 119350
+ },
+ {
+ "epoch": 0.6160323184794217,
+ "grad_norm": 23258.296875,
+ "learning_rate": 4.861276304919026e-05,
+ "loss": 0.418,
+ "step": 119400
+ },
+ {
+ "epoch": 0.6162902884620346,
+ "grad_norm": 23629.14453125,
+ "learning_rate": 4.857624733816657e-05,
+ "loss": 0.4221,
+ "step": 119450
+ },
+ {
+ "epoch": 0.6165482584446473,
+ "grad_norm": 22892.7734375,
+ "learning_rate": 4.853973238711449e-05,
+ "loss": 0.4278,
+ "step": 119500
+ },
+ {
+ "epoch": 0.6168062284272602,
+ "grad_norm": 21639.669921875,
+ "learning_rate": 4.850321821552497e-05,
+ "loss": 0.4224,
+ "step": 119550
+ },
+ {
+ "epoch": 0.617064198409873,
+ "grad_norm": 21392.951171875,
+ "learning_rate": 4.84667048428886e-05,
+ "loss": 0.4192,
+ "step": 119600
+ },
+ {
+ "epoch": 0.6173221683924859,
+ "grad_norm": 22603.51953125,
+ "learning_rate": 4.843019228869548e-05,
+ "loss": 0.4169,
+ "step": 119650
+ },
+ {
+ "epoch": 0.6175801383750986,
+ "grad_norm": 22470.62109375,
+ "learning_rate": 4.8393680572435324e-05,
+ "loss": 0.4175,
+ "step": 119700
+ },
+ {
+ "epoch": 0.6178381083577115,
+ "grad_norm": 26185.634765625,
+ "learning_rate": 4.835716971359737e-05,
+ "loss": 0.4208,
+ "step": 119750
+ },
+ {
+ "epoch": 0.6180960783403243,
+ "grad_norm": 21508.12109375,
+ "learning_rate": 4.832065973167041e-05,
+ "loss": 0.4194,
+ "step": 119800
+ },
+ {
+ "epoch": 0.6183540483229372,
+ "grad_norm": 20717.205078125,
+ "learning_rate": 4.8284150646142784e-05,
+ "loss": 0.424,
+ "step": 119850
+ },
+ {
+ "epoch": 0.61861201830555,
+ "grad_norm": 20015.30078125,
+ "learning_rate": 4.8247642476502284e-05,
+ "loss": 0.4189,
+ "step": 119900
+ },
+ {
+ "epoch": 0.6188699882881628,
+ "grad_norm": 21596.349609375,
+ "learning_rate": 4.821113524223634e-05,
+ "loss": 0.4218,
+ "step": 119950
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "grad_norm": 22051.921875,
+ "learning_rate": 4.817462896283173e-05,
+ "loss": 0.4184,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "eval_loss": 0.40621376037597656,
+ "eval_runtime": 3588.5932,
+ "eval_samples_per_second": 864.16,
+ "eval_steps_per_second": 1.688,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6193859282533885,
+ "grad_norm": 22562.478515625,
+ "learning_rate": 4.813812365777486e-05,
+ "loss": 0.4171,
+ "step": 120050
+ },
+ {
+ "epoch": 0.6196438982360013,
+ "grad_norm": 22531.505859375,
+ "learning_rate": 4.81016193465515e-05,
+ "loss": 0.4171,
+ "step": 120100
+ },
+ {
+ "epoch": 0.619901868218614,
+ "grad_norm": 21869.177734375,
+ "learning_rate": 4.8065116048647005e-05,
+ "loss": 0.4184,
+ "step": 120150
+ },
+ {
+ "epoch": 0.6201598382012269,
+ "grad_norm": 23087.56640625,
+ "learning_rate": 4.802861378354607e-05,
+ "loss": 0.4177,
+ "step": 120200
+ },
+ {
+ "epoch": 0.6204178081838397,
+ "grad_norm": 22546.060546875,
+ "learning_rate": 4.7992112570732925e-05,
+ "loss": 0.4213,
+ "step": 120250
+ },
+ {
+ "epoch": 0.6206757781664526,
+ "grad_norm": 22802.8984375,
+ "learning_rate": 4.795561242969122e-05,
+ "loss": 0.4218,
+ "step": 120300
+ },
+ {
+ "epoch": 0.6209337481490653,
+ "grad_norm": 19467.32421875,
+ "learning_rate": 4.791911337990401e-05,
+ "loss": 0.4141,
+ "step": 120350
+ },
+ {
+ "epoch": 0.6211917181316782,
+ "grad_norm": 25076.169921875,
+ "learning_rate": 4.78826154408538e-05,
+ "loss": 0.4178,
+ "step": 120400
+ },
+ {
+ "epoch": 0.621449688114291,
+ "grad_norm": 20815.935546875,
+ "learning_rate": 4.784611863202244e-05,
+ "loss": 0.4217,
+ "step": 120450
+ },
+ {
+ "epoch": 0.6217076580969039,
+ "grad_norm": 21686.271484375,
+ "learning_rate": 4.780962297289126e-05,
+ "loss": 0.4124,
+ "step": 120500
+ },
+ {
+ "epoch": 0.6219656280795167,
+ "grad_norm": 22759.310546875,
+ "learning_rate": 4.777312848294092e-05,
+ "loss": 0.4159,
+ "step": 120550
+ },
+ {
+ "epoch": 0.6222235980621295,
+ "grad_norm": 25325.75390625,
+ "learning_rate": 4.773663518165148e-05,
+ "loss": 0.4176,
+ "step": 120600
+ },
+ {
+ "epoch": 0.6224815680447423,
+ "grad_norm": 23474.958984375,
+ "learning_rate": 4.7700143088502344e-05,
+ "loss": 0.4143,
+ "step": 120650
+ },
+ {
+ "epoch": 0.6227395380273552,
+ "grad_norm": 25355.40625,
+ "learning_rate": 4.766365222297229e-05,
+ "loss": 0.4262,
+ "step": 120700
+ },
+ {
+ "epoch": 0.622997508009968,
+ "grad_norm": 22215.14453125,
+ "learning_rate": 4.762716260453945e-05,
+ "loss": 0.4149,
+ "step": 120750
+ },
+ {
+ "epoch": 0.6232554779925807,
+ "grad_norm": 23521.607421875,
+ "learning_rate": 4.759067425268126e-05,
+ "loss": 0.4223,
+ "step": 120800
+ },
+ {
+ "epoch": 0.6235134479751936,
+ "grad_norm": 24524.02734375,
+ "learning_rate": 4.7554187186874513e-05,
+ "loss": 0.4256,
+ "step": 120850
+ },
+ {
+ "epoch": 0.6237714179578064,
+ "grad_norm": 19954.322265625,
+ "learning_rate": 4.7517701426595266e-05,
+ "loss": 0.4119,
+ "step": 120900
+ },
+ {
+ "epoch": 0.6240293879404193,
+ "grad_norm": 21612.1953125,
+ "learning_rate": 4.748121699131893e-05,
+ "loss": 0.4196,
+ "step": 120950
+ },
+ {
+ "epoch": 0.624287357923032,
+ "grad_norm": 20466.0,
+ "learning_rate": 4.744473390052019e-05,
+ "loss": 0.4181,
+ "step": 121000
+ },
+ {
+ "epoch": 0.6245453279056449,
+ "grad_norm": 19992.173828125,
+ "learning_rate": 4.740825217367304e-05,
+ "loss": 0.4159,
+ "step": 121050
+ },
+ {
+ "epoch": 0.6248032978882577,
+ "grad_norm": 21553.1328125,
+ "learning_rate": 4.737177183025067e-05,
+ "loss": 0.4157,
+ "step": 121100
+ },
+ {
+ "epoch": 0.6250612678708706,
+ "grad_norm": 22242.568359375,
+ "learning_rate": 4.73352928897256e-05,
+ "loss": 0.4153,
+ "step": 121150
+ },
+ {
+ "epoch": 0.6253192378534834,
+ "grad_norm": 23883.212890625,
+ "learning_rate": 4.7298815371569606e-05,
+ "loss": 0.4173,
+ "step": 121200
+ },
+ {
+ "epoch": 0.6255772078360962,
+ "grad_norm": 22386.505859375,
+ "learning_rate": 4.7262339295253645e-05,
+ "loss": 0.4178,
+ "step": 121250
+ },
+ {
+ "epoch": 0.625835177818709,
+ "grad_norm": 22051.859375,
+ "learning_rate": 4.722586468024797e-05,
+ "loss": 0.4111,
+ "step": 121300
+ },
+ {
+ "epoch": 0.6260931478013219,
+ "grad_norm": 21374.4765625,
+ "learning_rate": 4.7189391546021996e-05,
+ "loss": 0.418,
+ "step": 121350
+ },
+ {
+ "epoch": 0.6263511177839347,
+ "grad_norm": 22240.453125,
+ "learning_rate": 4.7152919912044406e-05,
+ "loss": 0.4196,
+ "step": 121400
+ },
+ {
+ "epoch": 0.6266090877665474,
+ "grad_norm": 26278.798828125,
+ "learning_rate": 4.711644979778302e-05,
+ "loss": 0.4165,
+ "step": 121450
+ },
+ {
+ "epoch": 0.6268670577491603,
+ "grad_norm": 22151.77734375,
+ "learning_rate": 4.707998122270492e-05,
+ "loss": 0.422,
+ "step": 121500
+ },
+ {
+ "epoch": 0.6271250277317731,
+ "grad_norm": 21278.99609375,
+ "learning_rate": 4.7043514206276276e-05,
+ "loss": 0.4202,
+ "step": 121550
+ },
+ {
+ "epoch": 0.627382997714386,
+ "grad_norm": 24062.6015625,
+ "learning_rate": 4.70070487679625e-05,
+ "loss": 0.4174,
+ "step": 121600
+ },
+ {
+ "epoch": 0.6276409676969987,
+ "grad_norm": 21124.400390625,
+ "learning_rate": 4.697058492722815e-05,
+ "loss": 0.4156,
+ "step": 121650
+ },
+ {
+ "epoch": 0.6278989376796116,
+ "grad_norm": 22513.48046875,
+ "learning_rate": 4.6934122703536894e-05,
+ "loss": 0.4198,
+ "step": 121700
+ },
+ {
+ "epoch": 0.6281569076622244,
+ "grad_norm": 24250.720703125,
+ "learning_rate": 4.689766211635159e-05,
+ "loss": 0.4197,
+ "step": 121750
+ },
+ {
+ "epoch": 0.6284148776448373,
+ "grad_norm": 23831.220703125,
+ "learning_rate": 4.686120318513415e-05,
+ "loss": 0.415,
+ "step": 121800
+ },
+ {
+ "epoch": 0.62867284762745,
+ "grad_norm": 24005.458984375,
+ "learning_rate": 4.682474592934569e-05,
+ "loss": 0.4154,
+ "step": 121850
+ },
+ {
+ "epoch": 0.6289308176100629,
+ "grad_norm": 21365.09375,
+ "learning_rate": 4.6788290368446355e-05,
+ "loss": 0.4164,
+ "step": 121900
+ },
+ {
+ "epoch": 0.6291887875926757,
+ "grad_norm": 23601.689453125,
+ "learning_rate": 4.675183652189545e-05,
+ "loss": 0.418,
+ "step": 121950
+ },
+ {
+ "epoch": 0.6294467575752886,
+ "grad_norm": 21023.33203125,
+ "learning_rate": 4.671538440915129e-05,
+ "loss": 0.4181,
+ "step": 122000
+ },
+ {
+ "epoch": 0.6297047275579014,
+ "grad_norm": 22292.671875,
+ "learning_rate": 4.667893404967133e-05,
+ "loss": 0.4203,
+ "step": 122050
+ },
+ {
+ "epoch": 0.6299626975405141,
+ "grad_norm": 21975.3671875,
+ "learning_rate": 4.664248546291207e-05,
+ "loss": 0.4162,
+ "step": 122100
+ },
+ {
+ "epoch": 0.630220667523127,
+ "grad_norm": 22591.34765625,
+ "learning_rate": 4.660603866832906e-05,
+ "loss": 0.4146,
+ "step": 122150
+ },
+ {
+ "epoch": 0.6304786375057398,
+ "grad_norm": 23449.529296875,
+ "learning_rate": 4.6569593685376895e-05,
+ "loss": 0.4205,
+ "step": 122200
+ },
+ {
+ "epoch": 0.6307366074883527,
+ "grad_norm": 21614.046875,
+ "learning_rate": 4.653315053350918e-05,
+ "loss": 0.4173,
+ "step": 122250
+ },
+ {
+ "epoch": 0.6309945774709654,
+ "grad_norm": 26004.5859375,
+ "learning_rate": 4.649670923217859e-05,
+ "loss": 0.4137,
+ "step": 122300
+ },
+ {
+ "epoch": 0.6312525474535783,
+ "grad_norm": 23640.357421875,
+ "learning_rate": 4.646026980083676e-05,
+ "loss": 0.4165,
+ "step": 122350
+ },
+ {
+ "epoch": 0.6315105174361911,
+ "grad_norm": 23575.3984375,
+ "learning_rate": 4.6423832258934396e-05,
+ "loss": 0.4179,
+ "step": 122400
+ },
+ {
+ "epoch": 0.631768487418804,
+ "grad_norm": 26795.05078125,
+ "learning_rate": 4.63873966259211e-05,
+ "loss": 0.4171,
+ "step": 122450
+ },
+ {
+ "epoch": 0.6320264574014167,
+ "grad_norm": 22246.931640625,
+ "learning_rate": 4.6350962921245515e-05,
+ "loss": 0.4188,
+ "step": 122500
+ },
+ {
+ "epoch": 0.6322844273840296,
+ "grad_norm": 22268.3359375,
+ "learning_rate": 4.63145311643553e-05,
+ "loss": 0.4141,
+ "step": 122550
+ },
+ {
+ "epoch": 0.6325423973666424,
+ "grad_norm": 23749.38671875,
+ "learning_rate": 4.627810137469696e-05,
+ "loss": 0.4132,
+ "step": 122600
+ },
+ {
+ "epoch": 0.6328003673492553,
+ "grad_norm": 22449.15625,
+ "learning_rate": 4.624167357171606e-05,
+ "loss": 0.4177,
+ "step": 122650
+ },
+ {
+ "epoch": 0.6330583373318681,
+ "grad_norm": 22132.927734375,
+ "learning_rate": 4.6205247774857e-05,
+ "loss": 0.4211,
+ "step": 122700
+ },
+ {
+ "epoch": 0.6333163073144809,
+ "grad_norm": 20199.654296875,
+ "learning_rate": 4.616882400356323e-05,
+ "loss": 0.4127,
+ "step": 122750
+ },
+ {
+ "epoch": 0.6335742772970937,
+ "grad_norm": 23172.29296875,
+ "learning_rate": 4.613240227727699e-05,
+ "loss": 0.4173,
+ "step": 122800
+ },
+ {
+ "epoch": 0.6338322472797066,
+ "grad_norm": 23373.6640625,
+ "learning_rate": 4.609598261543954e-05,
+ "loss": 0.4139,
+ "step": 122850
+ },
+ {
+ "epoch": 0.6340902172623194,
+ "grad_norm": 22187.794921875,
+ "learning_rate": 4.6059565037490965e-05,
+ "loss": 0.4233,
+ "step": 122900
+ },
+ {
+ "epoch": 0.6343481872449321,
+ "grad_norm": 21762.28515625,
+ "learning_rate": 4.602314956287027e-05,
+ "loss": 0.4195,
+ "step": 122950
+ },
+ {
+ "epoch": 0.634606157227545,
+ "grad_norm": 24228.3125,
+ "learning_rate": 4.598673621101535e-05,
+ "loss": 0.4218,
+ "step": 123000
+ },
+ {
+ "epoch": 0.6348641272101578,
+ "grad_norm": 20360.208984375,
+ "learning_rate": 4.595032500136291e-05,
+ "loss": 0.4266,
+ "step": 123050
+ },
+ {
+ "epoch": 0.6351220971927707,
+ "grad_norm": 22763.712890625,
+ "learning_rate": 4.5913915953348574e-05,
+ "loss": 0.4153,
+ "step": 123100
+ },
+ {
+ "epoch": 0.6353800671753834,
+ "grad_norm": 25601.05078125,
+ "learning_rate": 4.5877509086406766e-05,
+ "loss": 0.4201,
+ "step": 123150
+ },
+ {
+ "epoch": 0.6356380371579963,
+ "grad_norm": 22695.91015625,
+ "learning_rate": 4.584110441997081e-05,
+ "loss": 0.4174,
+ "step": 123200
+ },
+ {
+ "epoch": 0.6358960071406091,
+ "grad_norm": 24915.857421875,
+ "learning_rate": 4.5804701973472755e-05,
+ "loss": 0.416,
+ "step": 123250
+ },
+ {
+ "epoch": 0.636153977123222,
+ "grad_norm": 24066.427734375,
+ "learning_rate": 4.576830176634356e-05,
+ "loss": 0.415,
+ "step": 123300
+ },
+ {
+ "epoch": 0.6364119471058348,
+ "grad_norm": 25726.71484375,
+ "learning_rate": 4.573190381801293e-05,
+ "loss": 0.4204,
+ "step": 123350
+ },
+ {
+ "epoch": 0.6366699170884476,
+ "grad_norm": 24271.998046875,
+ "learning_rate": 4.56955081479094e-05,
+ "loss": 0.4166,
+ "step": 123400
+ },
+ {
+ "epoch": 0.6369278870710604,
+ "grad_norm": 20897.818359375,
+ "learning_rate": 4.5659114775460286e-05,
+ "loss": 0.4156,
+ "step": 123450
+ },
+ {
+ "epoch": 0.6371858570536733,
+ "grad_norm": 24409.841796875,
+ "learning_rate": 4.562272372009163e-05,
+ "loss": 0.4208,
+ "step": 123500
+ },
+ {
+ "epoch": 0.6374438270362861,
+ "grad_norm": 24757.927734375,
+ "learning_rate": 4.5586335001228296e-05,
+ "loss": 0.4167,
+ "step": 123550
+ },
+ {
+ "epoch": 0.6377017970188988,
+ "grad_norm": 22433.091796875,
+ "learning_rate": 4.554994863829387e-05,
+ "loss": 0.4206,
+ "step": 123600
+ },
+ {
+ "epoch": 0.6379597670015117,
+ "grad_norm": 22757.798828125,
+ "learning_rate": 4.5513564650710706e-05,
+ "loss": 0.4113,
+ "step": 123650
+ },
+ {
+ "epoch": 0.6382177369841245,
+ "grad_norm": 22652.9140625,
+ "learning_rate": 4.547718305789984e-05,
+ "loss": 0.4224,
+ "step": 123700
+ },
+ {
+ "epoch": 0.6384757069667374,
+ "grad_norm": 25416.0390625,
+ "learning_rate": 4.5440803879281086e-05,
+ "loss": 0.4129,
+ "step": 123750
+ },
+ {
+ "epoch": 0.6387336769493501,
+ "grad_norm": 22621.40625,
+ "learning_rate": 4.5404427134272926e-05,
+ "loss": 0.4204,
+ "step": 123800
+ },
+ {
+ "epoch": 0.638991646931963,
+ "grad_norm": 24213.93359375,
+ "learning_rate": 4.536805284229258e-05,
+ "loss": 0.4109,
+ "step": 123850
+ },
+ {
+ "epoch": 0.6392496169145758,
+ "grad_norm": 20231.091796875,
+ "learning_rate": 4.5331681022755946e-05,
+ "loss": 0.4221,
+ "step": 123900
+ },
+ {
+ "epoch": 0.6395075868971887,
+ "grad_norm": 22513.21875,
+ "learning_rate": 4.529531169507757e-05,
+ "loss": 0.4189,
+ "step": 123950
+ },
+ {
+ "epoch": 0.6397655568798014,
+ "grad_norm": 19454.783203125,
+ "learning_rate": 4.5258944878670714e-05,
+ "loss": 0.4138,
+ "step": 124000
+ },
+ {
+ "epoch": 0.6400235268624143,
+ "grad_norm": 23547.423828125,
+ "learning_rate": 4.522258059294727e-05,
+ "loss": 0.4206,
+ "step": 124050
+ },
+ {
+ "epoch": 0.6402814968450271,
+ "grad_norm": 23985.0703125,
+ "learning_rate": 4.5186218857317825e-05,
+ "loss": 0.4186,
+ "step": 124100
+ },
+ {
+ "epoch": 0.64053946682764,
+ "grad_norm": 22254.078125,
+ "learning_rate": 4.5149859691191517e-05,
+ "loss": 0.4076,
+ "step": 124150
+ },
+ {
+ "epoch": 0.6407974368102528,
+ "grad_norm": 24060.70703125,
+ "learning_rate": 4.5113503113976194e-05,
+ "loss": 0.4207,
+ "step": 124200
+ },
+ {
+ "epoch": 0.6410554067928655,
+ "grad_norm": 21521.923828125,
+ "learning_rate": 4.5077149145078275e-05,
+ "loss": 0.4134,
+ "step": 124250
+ },
+ {
+ "epoch": 0.6413133767754784,
+ "grad_norm": 22107.48828125,
+ "learning_rate": 4.504079780390282e-05,
+ "loss": 0.4095,
+ "step": 124300
+ },
+ {
+ "epoch": 0.6415713467580912,
+ "grad_norm": 22610.880859375,
+ "learning_rate": 4.5004449109853485e-05,
+ "loss": 0.4216,
+ "step": 124350
+ },
+ {
+ "epoch": 0.6418293167407041,
+ "grad_norm": 22752.83984375,
+ "learning_rate": 4.496810308233247e-05,
+ "loss": 0.4225,
+ "step": 124400
+ },
+ {
+ "epoch": 0.6420872867233168,
+ "grad_norm": 22029.88671875,
+ "learning_rate": 4.4931759740740596e-05,
+ "loss": 0.4138,
+ "step": 124450
+ },
+ {
+ "epoch": 0.6423452567059297,
+ "grad_norm": 24989.2421875,
+ "learning_rate": 4.489541910447722e-05,
+ "loss": 0.4166,
+ "step": 124500
+ },
+ {
+ "epoch": 0.6426032266885425,
+ "grad_norm": 25843.16796875,
+ "learning_rate": 4.485908119294031e-05,
+ "loss": 0.4132,
+ "step": 124550
+ },
+ {
+ "epoch": 0.6428611966711554,
+ "grad_norm": 23847.01171875,
+ "learning_rate": 4.4822746025526286e-05,
+ "loss": 0.4256,
+ "step": 124600
+ },
+ {
+ "epoch": 0.6431191666537681,
+ "grad_norm": 21634.71484375,
+ "learning_rate": 4.478641362163019e-05,
+ "loss": 0.4182,
+ "step": 124650
+ },
+ {
+ "epoch": 0.643377136636381,
+ "grad_norm": 22252.021484375,
+ "learning_rate": 4.475008400064554e-05,
+ "loss": 0.419,
+ "step": 124700
+ },
+ {
+ "epoch": 0.6436351066189938,
+ "grad_norm": 24151.951171875,
+ "learning_rate": 4.471375718196439e-05,
+ "loss": 0.4201,
+ "step": 124750
+ },
+ {
+ "epoch": 0.6438930766016067,
+ "grad_norm": 23570.310546875,
+ "learning_rate": 4.4677433184977315e-05,
+ "loss": 0.4131,
+ "step": 124800
+ },
+ {
+ "epoch": 0.6441510465842195,
+ "grad_norm": 23886.896484375,
+ "learning_rate": 4.464111202907332e-05,
+ "loss": 0.4172,
+ "step": 124850
+ },
+ {
+ "epoch": 0.6444090165668322,
+ "grad_norm": 23476.888671875,
+ "learning_rate": 4.4604793733639973e-05,
+ "loss": 0.419,
+ "step": 124900
+ },
+ {
+ "epoch": 0.6446669865494451,
+ "grad_norm": 22735.759765625,
+ "learning_rate": 4.456847831806324e-05,
+ "loss": 0.4214,
+ "step": 124950
+ },
+ {
+ "epoch": 0.644924956532058,
+ "grad_norm": 25508.525390625,
+ "learning_rate": 4.4532165801727626e-05,
+ "loss": 0.4184,
+ "step": 125000
+ },
+ {
+ "epoch": 0.644924956532058,
+ "eval_loss": 0.40382638573646545,
+ "eval_runtime": 3215.6548,
+ "eval_samples_per_second": 964.382,
+ "eval_steps_per_second": 1.884,
+ "step": 125000
+ },
+ {
+ "epoch": 0.6451829265146708,
+ "grad_norm": 23686.8671875,
+ "learning_rate": 4.449585620401601e-05,
+ "loss": 0.4115,
+ "step": 125050
+ },
+ {
+ "epoch": 0.6454408964972835,
+ "grad_norm": 22472.7421875,
+ "learning_rate": 4.445954954430976e-05,
+ "loss": 0.4187,
+ "step": 125100
+ },
+ {
+ "epoch": 0.6456988664798964,
+ "grad_norm": 25044.5859375,
+ "learning_rate": 4.442324584198871e-05,
+ "loss": 0.4188,
+ "step": 125150
+ },
+ {
+ "epoch": 0.6459568364625092,
+ "grad_norm": 23489.119140625,
+ "learning_rate": 4.4386945116431025e-05,
+ "loss": 0.4212,
+ "step": 125200
+ },
+ {
+ "epoch": 0.6462148064451221,
+ "grad_norm": 23150.12109375,
+ "learning_rate": 4.435064738701335e-05,
+ "loss": 0.4155,
+ "step": 125250
+ },
+ {
+ "epoch": 0.6464727764277348,
+ "grad_norm": 22082.09765625,
+ "learning_rate": 4.4314352673110696e-05,
+ "loss": 0.4208,
+ "step": 125300
+ },
+ {
+ "epoch": 0.6467307464103477,
+ "grad_norm": 23107.71484375,
+ "learning_rate": 4.427806099409652e-05,
+ "loss": 0.4172,
+ "step": 125350
+ },
+ {
+ "epoch": 0.6469887163929605,
+ "grad_norm": 23660.607421875,
+ "learning_rate": 4.4241772369342554e-05,
+ "loss": 0.4156,
+ "step": 125400
+ },
+ {
+ "epoch": 0.6472466863755734,
+ "grad_norm": 22054.47265625,
+ "learning_rate": 4.420548681821901e-05,
+ "loss": 0.4174,
+ "step": 125450
+ },
+ {
+ "epoch": 0.6475046563581862,
+ "grad_norm": 22386.654296875,
+ "learning_rate": 4.416920436009439e-05,
+ "loss": 0.4164,
+ "step": 125500
+ },
+ {
+ "epoch": 0.647762626340799,
+ "grad_norm": 22394.78125,
+ "learning_rate": 4.413292501433557e-05,
+ "loss": 0.4128,
+ "step": 125550
+ },
+ {
+ "epoch": 0.6480205963234118,
+ "grad_norm": 21871.1953125,
+ "learning_rate": 4.4096648800307796e-05,
+ "loss": 0.4174,
+ "step": 125600
+ },
+ {
+ "epoch": 0.6482785663060247,
+ "grad_norm": 21630.826171875,
+ "learning_rate": 4.406037573737456e-05,
+ "loss": 0.4146,
+ "step": 125650
+ },
+ {
+ "epoch": 0.6485365362886375,
+ "grad_norm": 20917.244140625,
+ "learning_rate": 4.4024105844897744e-05,
+ "loss": 0.4172,
+ "step": 125700
+ },
+ {
+ "epoch": 0.6487945062712502,
+ "grad_norm": 21545.53515625,
+ "learning_rate": 4.3987839142237505e-05,
+ "loss": 0.4189,
+ "step": 125750
+ },
+ {
+ "epoch": 0.6490524762538631,
+ "grad_norm": 27708.19140625,
+ "learning_rate": 4.395157564875234e-05,
+ "loss": 0.4127,
+ "step": 125800
+ },
+ {
+ "epoch": 0.6493104462364759,
+ "grad_norm": 23791.052734375,
+ "learning_rate": 4.391531538379895e-05,
+ "loss": 0.4146,
+ "step": 125850
+ },
+ {
+ "epoch": 0.6495684162190888,
+ "grad_norm": 23441.0078125,
+ "learning_rate": 4.387905836673239e-05,
+ "loss": 0.4191,
+ "step": 125900
+ },
+ {
+ "epoch": 0.6498263862017015,
+ "grad_norm": 21998.982421875,
+ "learning_rate": 4.3842804616905944e-05,
+ "loss": 0.4165,
+ "step": 125950
+ },
+ {
+ "epoch": 0.6500843561843144,
+ "grad_norm": 26170.572265625,
+ "learning_rate": 4.380655415367116e-05,
+ "loss": 0.4106,
+ "step": 126000
+ },
+ {
+ "epoch": 0.6503423261669272,
+ "grad_norm": 23915.345703125,
+ "learning_rate": 4.3770306996377866e-05,
+ "loss": 0.417,
+ "step": 126050
+ },
+ {
+ "epoch": 0.6506002961495401,
+ "grad_norm": 22807.23828125,
+ "learning_rate": 4.373406316437404e-05,
+ "loss": 0.4138,
+ "step": 126100
+ },
+ {
+ "epoch": 0.6508582661321528,
+ "grad_norm": 22825.060546875,
+ "learning_rate": 4.369782267700598e-05,
+ "loss": 0.4159,
+ "step": 126150
+ },
+ {
+ "epoch": 0.6511162361147657,
+ "grad_norm": 21670.83984375,
+ "learning_rate": 4.366158555361812e-05,
+ "loss": 0.4131,
+ "step": 126200
+ },
+ {
+ "epoch": 0.6513742060973785,
+ "grad_norm": 24840.630859375,
+ "learning_rate": 4.362535181355319e-05,
+ "loss": 0.4072,
+ "step": 126250
+ },
+ {
+ "epoch": 0.6516321760799914,
+ "grad_norm": 24121.158203125,
+ "learning_rate": 4.358912147615199e-05,
+ "loss": 0.4085,
+ "step": 126300
+ },
+ {
+ "epoch": 0.6518901460626042,
+ "grad_norm": 21738.236328125,
+ "learning_rate": 4.355289456075363e-05,
+ "loss": 0.4154,
+ "step": 126350
+ },
+ {
+ "epoch": 0.6521481160452169,
+ "grad_norm": 24880.833984375,
+ "learning_rate": 4.3516671086695296e-05,
+ "loss": 0.4154,
+ "step": 126400
+ },
+ {
+ "epoch": 0.6524060860278298,
+ "grad_norm": 21572.140625,
+ "learning_rate": 4.348045107331239e-05,
+ "loss": 0.4185,
+ "step": 126450
+ },
+ {
+ "epoch": 0.6526640560104426,
+ "grad_norm": 24076.17578125,
+ "learning_rate": 4.344423453993849e-05,
+ "loss": 0.4132,
+ "step": 126500
+ },
+ {
+ "epoch": 0.6529220259930555,
+ "grad_norm": 23531.365234375,
+ "learning_rate": 4.340802150590522e-05,
+ "loss": 0.4179,
+ "step": 126550
+ },
+ {
+ "epoch": 0.6531799959756682,
+ "grad_norm": 24287.568359375,
+ "learning_rate": 4.337181199054243e-05,
+ "loss": 0.4136,
+ "step": 126600
+ },
+ {
+ "epoch": 0.6534379659582811,
+ "grad_norm": 23352.52734375,
+ "learning_rate": 4.3335606013178046e-05,
+ "loss": 0.4177,
+ "step": 126650
+ },
+ {
+ "epoch": 0.6536959359408939,
+ "grad_norm": 22291.494140625,
+ "learning_rate": 4.3299403593138144e-05,
+ "loss": 0.4155,
+ "step": 126700
+ },
+ {
+ "epoch": 0.6539539059235068,
+ "grad_norm": 20745.798828125,
+ "learning_rate": 4.3263204749746836e-05,
+ "loss": 0.4139,
+ "step": 126750
+ },
+ {
+ "epoch": 0.6542118759061195,
+ "grad_norm": 24670.357421875,
+ "learning_rate": 4.322700950232639e-05,
+ "loss": 0.423,
+ "step": 126800
+ },
+ {
+ "epoch": 0.6544698458887324,
+ "grad_norm": 23067.81640625,
+ "learning_rate": 4.31908178701971e-05,
+ "loss": 0.4174,
+ "step": 126850
+ },
+ {
+ "epoch": 0.6547278158713452,
+ "grad_norm": 25275.47265625,
+ "learning_rate": 4.315462987267739e-05,
+ "loss": 0.4181,
+ "step": 126900
+ },
+ {
+ "epoch": 0.6549857858539581,
+ "grad_norm": 21032.4375,
+ "learning_rate": 4.311844552908372e-05,
+ "loss": 0.4111,
+ "step": 126950
+ },
+ {
+ "epoch": 0.6552437558365709,
+ "grad_norm": 21629.0625,
+ "learning_rate": 4.308226485873056e-05,
+ "loss": 0.4129,
+ "step": 127000
+ },
+ {
+ "epoch": 0.6555017258191836,
+ "grad_norm": 24375.935546875,
+ "learning_rate": 4.3046087880930466e-05,
+ "loss": 0.4129,
+ "step": 127050
+ },
+ {
+ "epoch": 0.6557596958017965,
+ "grad_norm": 21224.63671875,
+ "learning_rate": 4.3009914614994e-05,
+ "loss": 0.4156,
+ "step": 127100
+ },
+ {
+ "epoch": 0.6560176657844093,
+ "grad_norm": 24836.560546875,
+ "learning_rate": 4.297374508022977e-05,
+ "loss": 0.4133,
+ "step": 127150
+ },
+ {
+ "epoch": 0.6562756357670222,
+ "grad_norm": 22769.599609375,
+ "learning_rate": 4.293757929594435e-05,
+ "loss": 0.4151,
+ "step": 127200
+ },
+ {
+ "epoch": 0.6565336057496349,
+ "grad_norm": 22936.603515625,
+ "learning_rate": 4.2901417281442345e-05,
+ "loss": 0.4173,
+ "step": 127250
+ },
+ {
+ "epoch": 0.6567915757322478,
+ "grad_norm": 21296.39453125,
+ "learning_rate": 4.286525905602634e-05,
+ "loss": 0.4121,
+ "step": 127300
+ },
+ {
+ "epoch": 0.6570495457148606,
+ "grad_norm": 24282.591796875,
+ "learning_rate": 4.282910463899689e-05,
+ "loss": 0.4086,
+ "step": 127350
+ },
+ {
+ "epoch": 0.6573075156974735,
+ "grad_norm": 22443.6015625,
+ "learning_rate": 4.2792954049652545e-05,
+ "loss": 0.4183,
+ "step": 127400
+ },
+ {
+ "epoch": 0.6575654856800862,
+ "grad_norm": 21437.98046875,
+ "learning_rate": 4.275680730728976e-05,
+ "loss": 0.4172,
+ "step": 127450
+ },
+ {
+ "epoch": 0.6578234556626991,
+ "grad_norm": 24970.3125,
+ "learning_rate": 4.2720664431202987e-05,
+ "loss": 0.4187,
+ "step": 127500
+ },
+ {
+ "epoch": 0.6580814256453119,
+ "grad_norm": 21128.349609375,
+ "learning_rate": 4.268452544068457e-05,
+ "loss": 0.4142,
+ "step": 127550
+ },
+ {
+ "epoch": 0.6583393956279248,
+ "grad_norm": 26429.14453125,
+ "learning_rate": 4.2648390355024836e-05,
+ "loss": 0.4115,
+ "step": 127600
+ },
+ {
+ "epoch": 0.6585973656105376,
+ "grad_norm": 22542.380859375,
+ "learning_rate": 4.261225919351195e-05,
+ "loss": 0.4144,
+ "step": 127650
+ },
+ {
+ "epoch": 0.6588553355931503,
+ "grad_norm": 23179.853515625,
+ "learning_rate": 4.257613197543207e-05,
+ "loss": 0.4164,
+ "step": 127700
+ },
+ {
+ "epoch": 0.6591133055757632,
+ "grad_norm": 24641.048828125,
+ "learning_rate": 4.254000872006918e-05,
+ "loss": 0.4175,
+ "step": 127750
+ },
+ {
+ "epoch": 0.659371275558376,
+ "grad_norm": 23836.771484375,
+ "learning_rate": 4.250388944670517e-05,
+ "loss": 0.4201,
+ "step": 127800
+ },
+ {
+ "epoch": 0.6596292455409889,
+ "grad_norm": 23714.7578125,
+ "learning_rate": 4.2467774174619836e-05,
+ "loss": 0.4102,
+ "step": 127850
+ },
+ {
+ "epoch": 0.6598872155236016,
+ "grad_norm": 23630.2890625,
+ "learning_rate": 4.2431662923090785e-05,
+ "loss": 0.411,
+ "step": 127900
+ },
+ {
+ "epoch": 0.6601451855062145,
+ "grad_norm": 23018.384765625,
+ "learning_rate": 4.239555571139353e-05,
+ "loss": 0.4113,
+ "step": 127950
+ },
+ {
+ "epoch": 0.6604031554888273,
+ "grad_norm": 23594.041015625,
+ "learning_rate": 4.235945255880137e-05,
+ "loss": 0.4153,
+ "step": 128000
+ },
+ {
+ "epoch": 0.6606611254714402,
+ "grad_norm": 24231.07421875,
+ "learning_rate": 4.232335348458549e-05,
+ "loss": 0.4159,
+ "step": 128050
+ },
+ {
+ "epoch": 0.6609190954540529,
+ "grad_norm": 22362.98828125,
+ "learning_rate": 4.228725850801486e-05,
+ "loss": 0.4218,
+ "step": 128100
+ },
+ {
+ "epoch": 0.6611770654366658,
+ "grad_norm": 23008.44140625,
+ "learning_rate": 4.225116764835631e-05,
+ "loss": 0.416,
+ "step": 128150
+ },
+ {
+ "epoch": 0.6614350354192786,
+ "grad_norm": 23027.1875,
+ "learning_rate": 4.221508092487441e-05,
+ "loss": 0.4163,
+ "step": 128200
+ },
+ {
+ "epoch": 0.6616930054018915,
+ "grad_norm": 25121.61328125,
+ "learning_rate": 4.2178998356831553e-05,
+ "loss": 0.4167,
+ "step": 128250
+ },
+ {
+ "epoch": 0.6619509753845043,
+ "grad_norm": 24767.4140625,
+ "learning_rate": 4.214291996348794e-05,
+ "loss": 0.4176,
+ "step": 128300
+ },
+ {
+ "epoch": 0.662208945367117,
+ "grad_norm": 24596.533203125,
+ "learning_rate": 4.210684576410151e-05,
+ "loss": 0.4183,
+ "step": 128350
+ },
+ {
+ "epoch": 0.6624669153497299,
+ "grad_norm": 21095.8671875,
+ "learning_rate": 4.2070775777927976e-05,
+ "loss": 0.4151,
+ "step": 128400
+ },
+ {
+ "epoch": 0.6627248853323428,
+ "grad_norm": 25389.1640625,
+ "learning_rate": 4.203471002422077e-05,
+ "loss": 0.4226,
+ "step": 128450
+ },
+ {
+ "epoch": 0.6629828553149556,
+ "grad_norm": 24613.94921875,
+ "learning_rate": 4.199864852223113e-05,
+ "loss": 0.4093,
+ "step": 128500
+ },
+ {
+ "epoch": 0.6632408252975683,
+ "grad_norm": 23665.59765625,
+ "learning_rate": 4.196259129120796e-05,
+ "loss": 0.4135,
+ "step": 128550
+ },
+ {
+ "epoch": 0.6634987952801812,
+ "grad_norm": 22946.5234375,
+ "learning_rate": 4.192653835039795e-05,
+ "loss": 0.4151,
+ "step": 128600
+ },
+ {
+ "epoch": 0.663756765262794,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 4.189048971904541e-05,
+ "loss": 0.4064,
+ "step": 128650
+ },
+ {
+ "epoch": 0.6640147352454069,
+ "grad_norm": 22760.623046875,
+ "learning_rate": 4.185444541639243e-05,
+ "loss": 0.4084,
+ "step": 128700
+ },
+ {
+ "epoch": 0.6642727052280196,
+ "grad_norm": 25223.484375,
+ "learning_rate": 4.1818405461678763e-05,
+ "loss": 0.4151,
+ "step": 128750
+ },
+ {
+ "epoch": 0.6645306752106325,
+ "grad_norm": 31547.962890625,
+ "learning_rate": 4.178236987414182e-05,
+ "loss": 0.4115,
+ "step": 128800
+ },
+ {
+ "epoch": 0.6647886451932453,
+ "grad_norm": 19114.953125,
+ "learning_rate": 4.174633867301674e-05,
+ "loss": 0.4109,
+ "step": 128850
+ },
+ {
+ "epoch": 0.6650466151758582,
+ "grad_norm": 22819.888671875,
+ "learning_rate": 4.1710311877536226e-05,
+ "loss": 0.4123,
+ "step": 128900
+ },
+ {
+ "epoch": 0.6653045851584709,
+ "grad_norm": 22868.62890625,
+ "learning_rate": 4.167428950693073e-05,
+ "loss": 0.413,
+ "step": 128950
+ },
+ {
+ "epoch": 0.6655625551410838,
+ "grad_norm": 23062.359375,
+ "learning_rate": 4.163827158042826e-05,
+ "loss": 0.4152,
+ "step": 129000
+ },
+ {
+ "epoch": 0.6658205251236966,
+ "grad_norm": 25990.505859375,
+ "learning_rate": 4.160225811725453e-05,
+ "loss": 0.4176,
+ "step": 129050
+ },
+ {
+ "epoch": 0.6660784951063095,
+ "grad_norm": 21594.1953125,
+ "learning_rate": 4.156624913663279e-05,
+ "loss": 0.4136,
+ "step": 129100
+ },
+ {
+ "epoch": 0.6663364650889223,
+ "grad_norm": 21145.869140625,
+ "learning_rate": 4.153024465778393e-05,
+ "loss": 0.4216,
+ "step": 129150
+ },
+ {
+ "epoch": 0.666594435071535,
+ "grad_norm": 22634.7734375,
+ "learning_rate": 4.149424469992649e-05,
+ "loss": 0.4114,
+ "step": 129200
+ },
+ {
+ "epoch": 0.6668524050541479,
+ "grad_norm": 23526.46875,
+ "learning_rate": 4.145824928227652e-05,
+ "loss": 0.4217,
+ "step": 129250
+ },
+ {
+ "epoch": 0.6671103750367607,
+ "grad_norm": 22295.880859375,
+ "learning_rate": 4.142225842404769e-05,
+ "loss": 0.4169,
+ "step": 129300
+ },
+ {
+ "epoch": 0.6673683450193736,
+ "grad_norm": 22282.421875,
+ "learning_rate": 4.13862721444512e-05,
+ "loss": 0.4195,
+ "step": 129350
+ },
+ {
+ "epoch": 0.6676263150019863,
+ "grad_norm": 21856.337890625,
+ "learning_rate": 4.135029046269585e-05,
+ "loss": 0.4229,
+ "step": 129400
+ },
+ {
+ "epoch": 0.6678842849845992,
+ "grad_norm": 20999.04296875,
+ "learning_rate": 4.131431339798796e-05,
+ "loss": 0.4168,
+ "step": 129450
+ },
+ {
+ "epoch": 0.668142254967212,
+ "grad_norm": 24684.484375,
+ "learning_rate": 4.12783409695314e-05,
+ "loss": 0.4117,
+ "step": 129500
+ },
+ {
+ "epoch": 0.6684002249498249,
+ "grad_norm": 24120.349609375,
+ "learning_rate": 4.124237319652753e-05,
+ "loss": 0.4186,
+ "step": 129550
+ },
+ {
+ "epoch": 0.6686581949324376,
+ "grad_norm": 23283.736328125,
+ "learning_rate": 4.1206410098175265e-05,
+ "loss": 0.4176,
+ "step": 129600
+ },
+ {
+ "epoch": 0.6689161649150505,
+ "grad_norm": 21902.6875,
+ "learning_rate": 4.117045169367102e-05,
+ "loss": 0.4153,
+ "step": 129650
+ },
+ {
+ "epoch": 0.6691741348976633,
+ "grad_norm": 22762.6015625,
+ "learning_rate": 4.1134498002208674e-05,
+ "loss": 0.414,
+ "step": 129700
+ },
+ {
+ "epoch": 0.6694321048802762,
+ "grad_norm": 20947.083984375,
+ "learning_rate": 4.109854904297965e-05,
+ "loss": 0.4113,
+ "step": 129750
+ },
+ {
+ "epoch": 0.669690074862889,
+ "grad_norm": 24687.189453125,
+ "learning_rate": 4.106260483517276e-05,
+ "loss": 0.4207,
+ "step": 129800
+ },
+ {
+ "epoch": 0.6699480448455017,
+ "grad_norm": 24164.724609375,
+ "learning_rate": 4.102666539797435e-05,
+ "loss": 0.4116,
+ "step": 129850
+ },
+ {
+ "epoch": 0.6702060148281146,
+ "grad_norm": 23408.68359375,
+ "learning_rate": 4.099073075056818e-05,
+ "loss": 0.4181,
+ "step": 129900
+ },
+ {
+ "epoch": 0.6704639848107274,
+ "grad_norm": 22822.3515625,
+ "learning_rate": 4.0954800912135516e-05,
+ "loss": 0.4176,
+ "step": 129950
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "grad_norm": 21576.173828125,
+ "learning_rate": 4.091887590185494e-05,
+ "loss": 0.4165,
+ "step": 130000
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "eval_loss": 0.40186887979507446,
+ "eval_runtime": 3150.7117,
+ "eval_samples_per_second": 984.26,
+ "eval_steps_per_second": 1.922,
+ "step": 130000
+ },
+ {
+ "epoch": 0.670979924775953,
+ "grad_norm": 21987.3671875,
+ "learning_rate": 4.0882955738902576e-05,
+ "loss": 0.4176,
+ "step": 130050
+ },
+ {
+ "epoch": 0.6712378947585659,
+ "grad_norm": 23900.74609375,
+ "learning_rate": 4.0847040442451895e-05,
+ "loss": 0.4183,
+ "step": 130100
+ },
+ {
+ "epoch": 0.6714958647411787,
+ "grad_norm": 22624.236328125,
+ "learning_rate": 4.081113003167378e-05,
+ "loss": 0.4146,
+ "step": 130150
+ },
+ {
+ "epoch": 0.6717538347237916,
+ "grad_norm": 22636.490234375,
+ "learning_rate": 4.0775224525736546e-05,
+ "loss": 0.4107,
+ "step": 130200
+ },
+ {
+ "epoch": 0.6720118047064043,
+ "grad_norm": 22667.66796875,
+ "learning_rate": 4.07393239438058e-05,
+ "loss": 0.4151,
+ "step": 130250
+ },
+ {
+ "epoch": 0.6722697746890172,
+ "grad_norm": 20381.720703125,
+ "learning_rate": 4.070342830504465e-05,
+ "loss": 0.4167,
+ "step": 130300
+ },
+ {
+ "epoch": 0.67252774467163,
+ "grad_norm": 22913.248046875,
+ "learning_rate": 4.0667537628613424e-05,
+ "loss": 0.4116,
+ "step": 130350
+ },
+ {
+ "epoch": 0.6727857146542429,
+ "grad_norm": 23168.865234375,
+ "learning_rate": 4.063165193366992e-05,
+ "loss": 0.413,
+ "step": 130400
+ },
+ {
+ "epoch": 0.6730436846368557,
+ "grad_norm": 21597.861328125,
+ "learning_rate": 4.059577123936918e-05,
+ "loss": 0.4179,
+ "step": 130450
+ },
+ {
+ "epoch": 0.6733016546194684,
+ "grad_norm": 20305.806640625,
+ "learning_rate": 4.055989556486365e-05,
+ "loss": 0.4199,
+ "step": 130500
+ },
+ {
+ "epoch": 0.6735596246020813,
+ "grad_norm": 23520.173828125,
+ "learning_rate": 4.052402492930311e-05,
+ "loss": 0.4154,
+ "step": 130550
+ },
+ {
+ "epoch": 0.6738175945846941,
+ "grad_norm": 23356.85546875,
+ "learning_rate": 4.048815935183453e-05,
+ "loss": 0.4154,
+ "step": 130600
+ },
+ {
+ "epoch": 0.674075564567307,
+ "grad_norm": 22958.611328125,
+ "learning_rate": 4.0452298851602324e-05,
+ "loss": 0.4149,
+ "step": 130650
+ },
+ {
+ "epoch": 0.6743335345499197,
+ "grad_norm": 24888.25390625,
+ "learning_rate": 4.04164434477481e-05,
+ "loss": 0.4166,
+ "step": 130700
+ },
+ {
+ "epoch": 0.6745915045325326,
+ "grad_norm": 22958.189453125,
+ "learning_rate": 4.0380593159410806e-05,
+ "loss": 0.4159,
+ "step": 130750
+ },
+ {
+ "epoch": 0.6748494745151454,
+ "grad_norm": 21863.55859375,
+ "learning_rate": 4.03447480057266e-05,
+ "loss": 0.4142,
+ "step": 130800
+ },
+ {
+ "epoch": 0.6751074444977583,
+ "grad_norm": 23096.375,
+ "learning_rate": 4.030890800582895e-05,
+ "loss": 0.4108,
+ "step": 130850
+ },
+ {
+ "epoch": 0.675365414480371,
+ "grad_norm": 23506.576171875,
+ "learning_rate": 4.027307317884854e-05,
+ "loss": 0.4111,
+ "step": 130900
+ },
+ {
+ "epoch": 0.6756233844629839,
+ "grad_norm": 26913.11328125,
+ "learning_rate": 4.023724354391331e-05,
+ "loss": 0.4145,
+ "step": 130950
+ },
+ {
+ "epoch": 0.6758813544455967,
+ "grad_norm": 22008.958984375,
+ "learning_rate": 4.020141912014846e-05,
+ "loss": 0.4118,
+ "step": 131000
+ },
+ {
+ "epoch": 0.6761393244282096,
+ "grad_norm": 21431.857421875,
+ "learning_rate": 4.016559992667632e-05,
+ "loss": 0.417,
+ "step": 131050
+ },
+ {
+ "epoch": 0.6763972944108223,
+ "grad_norm": 24077.453125,
+ "learning_rate": 4.0129785982616524e-05,
+ "loss": 0.4121,
+ "step": 131100
+ },
+ {
+ "epoch": 0.6766552643934352,
+ "grad_norm": 22978.5390625,
+ "learning_rate": 4.009397730708583e-05,
+ "loss": 0.4074,
+ "step": 131150
+ },
+ {
+ "epoch": 0.676913234376048,
+ "grad_norm": 25474.740234375,
+ "learning_rate": 4.005817391919826e-05,
+ "loss": 0.4159,
+ "step": 131200
+ },
+ {
+ "epoch": 0.6771712043586608,
+ "grad_norm": 23532.416015625,
+ "learning_rate": 4.0022375838064904e-05,
+ "loss": 0.4202,
+ "step": 131250
+ },
+ {
+ "epoch": 0.6774291743412737,
+ "grad_norm": 23746.072265625,
+ "learning_rate": 3.998658308279414e-05,
+ "loss": 0.4157,
+ "step": 131300
+ },
+ {
+ "epoch": 0.6776871443238864,
+ "grad_norm": 21691.6875,
+ "learning_rate": 3.995079567249142e-05,
+ "loss": 0.4158,
+ "step": 131350
+ },
+ {
+ "epoch": 0.6779451143064993,
+ "grad_norm": 24167.923828125,
+ "learning_rate": 3.991501362625937e-05,
+ "loss": 0.4165,
+ "step": 131400
+ },
+ {
+ "epoch": 0.6782030842891121,
+ "grad_norm": 22420.27734375,
+ "learning_rate": 3.9879236963197784e-05,
+ "loss": 0.418,
+ "step": 131450
+ },
+ {
+ "epoch": 0.678461054271725,
+ "grad_norm": 22116.75,
+ "learning_rate": 3.984346570240352e-05,
+ "loss": 0.4152,
+ "step": 131500
+ },
+ {
+ "epoch": 0.6787190242543377,
+ "grad_norm": 23841.001953125,
+ "learning_rate": 3.9807699862970596e-05,
+ "loss": 0.4179,
+ "step": 131550
+ },
+ {
+ "epoch": 0.6789769942369506,
+ "grad_norm": 22931.126953125,
+ "learning_rate": 3.977193946399011e-05,
+ "loss": 0.4171,
+ "step": 131600
+ },
+ {
+ "epoch": 0.6792349642195634,
+ "grad_norm": 24939.294921875,
+ "learning_rate": 3.973618452455031e-05,
+ "loss": 0.4147,
+ "step": 131650
+ },
+ {
+ "epoch": 0.6794929342021763,
+ "grad_norm": 22026.615234375,
+ "learning_rate": 3.970043506373644e-05,
+ "loss": 0.4084,
+ "step": 131700
+ },
+ {
+ "epoch": 0.679750904184789,
+ "grad_norm": 24636.595703125,
+ "learning_rate": 3.9664691100630904e-05,
+ "loss": 0.4137,
+ "step": 131750
+ },
+ {
+ "epoch": 0.6800088741674019,
+ "grad_norm": 25599.443359375,
+ "learning_rate": 3.962895265431311e-05,
+ "loss": 0.4167,
+ "step": 131800
+ },
+ {
+ "epoch": 0.6802668441500147,
+ "grad_norm": 23514.0078125,
+ "learning_rate": 3.9593219743859575e-05,
+ "loss": 0.408,
+ "step": 131850
+ },
+ {
+ "epoch": 0.6805248141326276,
+ "grad_norm": 21798.9609375,
+ "learning_rate": 3.9557492388343844e-05,
+ "loss": 0.4129,
+ "step": 131900
+ },
+ {
+ "epoch": 0.6807827841152404,
+ "grad_norm": 24803.248046875,
+ "learning_rate": 3.952177060683644e-05,
+ "loss": 0.4126,
+ "step": 131950
+ },
+ {
+ "epoch": 0.6810407540978531,
+ "grad_norm": 23215.529296875,
+ "learning_rate": 3.948605441840501e-05,
+ "loss": 0.4114,
+ "step": 132000
+ },
+ {
+ "epoch": 0.681298724080466,
+ "grad_norm": 21179.626953125,
+ "learning_rate": 3.945034384211412e-05,
+ "loss": 0.4139,
+ "step": 132050
+ },
+ {
+ "epoch": 0.6815566940630788,
+ "grad_norm": 22894.04296875,
+ "learning_rate": 3.941463889702543e-05,
+ "loss": 0.4144,
+ "step": 132100
+ },
+ {
+ "epoch": 0.6818146640456917,
+ "grad_norm": 22581.392578125,
+ "learning_rate": 3.937893960219751e-05,
+ "loss": 0.4163,
+ "step": 132150
+ },
+ {
+ "epoch": 0.6820726340283044,
+ "grad_norm": 27557.634765625,
+ "learning_rate": 3.9343245976685966e-05,
+ "loss": 0.4194,
+ "step": 132200
+ },
+ {
+ "epoch": 0.6823306040109173,
+ "grad_norm": 24157.97265625,
+ "learning_rate": 3.9307558039543355e-05,
+ "loss": 0.4089,
+ "step": 132250
+ },
+ {
+ "epoch": 0.6825885739935301,
+ "grad_norm": 23363.904296875,
+ "learning_rate": 3.927187580981922e-05,
+ "loss": 0.4108,
+ "step": 132300
+ },
+ {
+ "epoch": 0.682846543976143,
+ "grad_norm": 24005.15625,
+ "learning_rate": 3.9236199306560054e-05,
+ "loss": 0.4103,
+ "step": 132350
+ },
+ {
+ "epoch": 0.6831045139587557,
+ "grad_norm": 23476.4609375,
+ "learning_rate": 3.920052854880925e-05,
+ "loss": 0.4189,
+ "step": 132400
+ },
+ {
+ "epoch": 0.6833624839413686,
+ "grad_norm": 23734.173828125,
+ "learning_rate": 3.91648635556072e-05,
+ "loss": 0.4183,
+ "step": 132450
+ },
+ {
+ "epoch": 0.6836204539239814,
+ "grad_norm": 22112.642578125,
+ "learning_rate": 3.912920434599117e-05,
+ "loss": 0.4139,
+ "step": 132500
+ },
+ {
+ "epoch": 0.6838784239065943,
+ "grad_norm": 23442.96484375,
+ "learning_rate": 3.909355093899537e-05,
+ "loss": 0.4137,
+ "step": 132550
+ },
+ {
+ "epoch": 0.6841363938892071,
+ "grad_norm": 22873.734375,
+ "learning_rate": 3.905790335365087e-05,
+ "loss": 0.4097,
+ "step": 132600
+ },
+ {
+ "epoch": 0.6843943638718198,
+ "grad_norm": 24382.9140625,
+ "learning_rate": 3.902226160898567e-05,
+ "loss": 0.4134,
+ "step": 132650
+ },
+ {
+ "epoch": 0.6846523338544327,
+ "grad_norm": 23238.1953125,
+ "learning_rate": 3.898662572402468e-05,
+ "loss": 0.4137,
+ "step": 132700
+ },
+ {
+ "epoch": 0.6849103038370455,
+ "grad_norm": 21690.37890625,
+ "learning_rate": 3.89509957177896e-05,
+ "loss": 0.4114,
+ "step": 132750
+ },
+ {
+ "epoch": 0.6851682738196584,
+ "grad_norm": 25762.189453125,
+ "learning_rate": 3.891537160929907e-05,
+ "loss": 0.4134,
+ "step": 132800
+ },
+ {
+ "epoch": 0.6854262438022711,
+ "grad_norm": 22006.044921875,
+ "learning_rate": 3.88797534175685e-05,
+ "loss": 0.4132,
+ "step": 132850
+ },
+ {
+ "epoch": 0.685684213784884,
+ "grad_norm": 22149.5546875,
+ "learning_rate": 3.8844141161610256e-05,
+ "loss": 0.4154,
+ "step": 132900
+ },
+ {
+ "epoch": 0.6859421837674968,
+ "grad_norm": 23865.419921875,
+ "learning_rate": 3.880853486043343e-05,
+ "loss": 0.4135,
+ "step": 132950
+ },
+ {
+ "epoch": 0.6862001537501097,
+ "grad_norm": 22708.126953125,
+ "learning_rate": 3.877293453304399e-05,
+ "loss": 0.4143,
+ "step": 133000
+ },
+ {
+ "epoch": 0.6864581237327224,
+ "grad_norm": 19948.517578125,
+ "learning_rate": 3.8737340198444683e-05,
+ "loss": 0.4181,
+ "step": 133050
+ },
+ {
+ "epoch": 0.6867160937153353,
+ "grad_norm": 22594.826171875,
+ "learning_rate": 3.870175187563509e-05,
+ "loss": 0.4108,
+ "step": 133100
+ },
+ {
+ "epoch": 0.6869740636979481,
+ "grad_norm": 24876.56640625,
+ "learning_rate": 3.866616958361159e-05,
+ "loss": 0.4136,
+ "step": 133150
+ },
+ {
+ "epoch": 0.687232033680561,
+ "grad_norm": 20055.0859375,
+ "learning_rate": 3.8630593341367285e-05,
+ "loss": 0.4176,
+ "step": 133200
+ },
+ {
+ "epoch": 0.6874900036631737,
+ "grad_norm": 24807.9140625,
+ "learning_rate": 3.8595023167892096e-05,
+ "loss": 0.4084,
+ "step": 133250
+ },
+ {
+ "epoch": 0.6877479736457865,
+ "grad_norm": 21060.78125,
+ "learning_rate": 3.8559459082172696e-05,
+ "loss": 0.4086,
+ "step": 133300
+ },
+ {
+ "epoch": 0.6880059436283994,
+ "grad_norm": 22740.255859375,
+ "learning_rate": 3.852390110319252e-05,
+ "loss": 0.4109,
+ "step": 133350
+ },
+ {
+ "epoch": 0.6882639136110122,
+ "grad_norm": 24095.68359375,
+ "learning_rate": 3.848834924993169e-05,
+ "loss": 0.4118,
+ "step": 133400
+ },
+ {
+ "epoch": 0.6885218835936251,
+ "grad_norm": 20011.78125,
+ "learning_rate": 3.8452803541367136e-05,
+ "loss": 0.4133,
+ "step": 133450
+ },
+ {
+ "epoch": 0.6887798535762378,
+ "grad_norm": 21369.7265625,
+ "learning_rate": 3.8417263996472444e-05,
+ "loss": 0.4104,
+ "step": 133500
+ },
+ {
+ "epoch": 0.6890378235588507,
+ "grad_norm": 22532.251953125,
+ "learning_rate": 3.8381730634217946e-05,
+ "loss": 0.415,
+ "step": 133550
+ },
+ {
+ "epoch": 0.6892957935414635,
+ "grad_norm": 21174.34765625,
+ "learning_rate": 3.8346203473570677e-05,
+ "loss": 0.4121,
+ "step": 133600
+ },
+ {
+ "epoch": 0.6895537635240764,
+ "grad_norm": 21758.87109375,
+ "learning_rate": 3.831068253349431e-05,
+ "loss": 0.4181,
+ "step": 133650
+ },
+ {
+ "epoch": 0.6898117335066891,
+ "grad_norm": 21809.083984375,
+ "learning_rate": 3.827516783294927e-05,
+ "loss": 0.41,
+ "step": 133700
+ },
+ {
+ "epoch": 0.690069703489302,
+ "grad_norm": 21419.69921875,
+ "learning_rate": 3.8239659390892593e-05,
+ "loss": 0.4166,
+ "step": 133750
+ },
+ {
+ "epoch": 0.6903276734719148,
+ "grad_norm": 20746.517578125,
+ "learning_rate": 3.820415722627802e-05,
+ "loss": 0.4168,
+ "step": 133800
+ },
+ {
+ "epoch": 0.6905856434545277,
+ "grad_norm": 22737.89453125,
+ "learning_rate": 3.816866135805589e-05,
+ "loss": 0.4119,
+ "step": 133850
+ },
+ {
+ "epoch": 0.6908436134371404,
+ "grad_norm": 23691.408203125,
+ "learning_rate": 3.813317180517324e-05,
+ "loss": 0.4105,
+ "step": 133900
+ },
+ {
+ "epoch": 0.6911015834197533,
+ "grad_norm": 22899.70703125,
+ "learning_rate": 3.8097688586573684e-05,
+ "loss": 0.412,
+ "step": 133950
+ },
+ {
+ "epoch": 0.6913595534023661,
+ "grad_norm": 25553.763671875,
+ "learning_rate": 3.8062211721197475e-05,
+ "loss": 0.4158,
+ "step": 134000
+ },
+ {
+ "epoch": 0.691617523384979,
+ "grad_norm": 22099.93359375,
+ "learning_rate": 3.802674122798152e-05,
+ "loss": 0.4149,
+ "step": 134050
+ },
+ {
+ "epoch": 0.6918754933675918,
+ "grad_norm": 25735.91015625,
+ "learning_rate": 3.799127712585922e-05,
+ "loss": 0.4058,
+ "step": 134100
+ },
+ {
+ "epoch": 0.6921334633502045,
+ "grad_norm": 21259.95703125,
+ "learning_rate": 3.795581943376067e-05,
+ "loss": 0.4192,
+ "step": 134150
+ },
+ {
+ "epoch": 0.6923914333328174,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 3.7920368170612476e-05,
+ "loss": 0.414,
+ "step": 134200
+ },
+ {
+ "epoch": 0.6926494033154302,
+ "grad_norm": 24721.974609375,
+ "learning_rate": 3.788492335533786e-05,
+ "loss": 0.4154,
+ "step": 134250
+ },
+ {
+ "epoch": 0.6929073732980431,
+ "grad_norm": 24267.611328125,
+ "learning_rate": 3.7849485006856545e-05,
+ "loss": 0.4108,
+ "step": 134300
+ },
+ {
+ "epoch": 0.6931653432806558,
+ "grad_norm": 25588.193359375,
+ "learning_rate": 3.781405314408486e-05,
+ "loss": 0.4169,
+ "step": 134350
+ },
+ {
+ "epoch": 0.6934233132632687,
+ "grad_norm": 22651.216796875,
+ "learning_rate": 3.7778627785935626e-05,
+ "loss": 0.4112,
+ "step": 134400
+ },
+ {
+ "epoch": 0.6936812832458815,
+ "grad_norm": 24765.76953125,
+ "learning_rate": 3.774320895131823e-05,
+ "loss": 0.4173,
+ "step": 134450
+ },
+ {
+ "epoch": 0.6939392532284944,
+ "grad_norm": 25384.44921875,
+ "learning_rate": 3.7707796659138584e-05,
+ "loss": 0.4097,
+ "step": 134500
+ },
+ {
+ "epoch": 0.6941972232111071,
+ "grad_norm": 21145.587890625,
+ "learning_rate": 3.767239092829903e-05,
+ "loss": 0.4125,
+ "step": 134550
+ },
+ {
+ "epoch": 0.69445519319372,
+ "grad_norm": 22693.28515625,
+ "learning_rate": 3.763699177769849e-05,
+ "loss": 0.4111,
+ "step": 134600
+ },
+ {
+ "epoch": 0.6947131631763328,
+ "grad_norm": 20415.33984375,
+ "learning_rate": 3.760159922623235e-05,
+ "loss": 0.4178,
+ "step": 134650
+ },
+ {
+ "epoch": 0.6949711331589457,
+ "grad_norm": 23304.33984375,
+ "learning_rate": 3.756621329279247e-05,
+ "loss": 0.4142,
+ "step": 134700
+ },
+ {
+ "epoch": 0.6952291031415585,
+ "grad_norm": 22485.029296875,
+ "learning_rate": 3.7530833996267156e-05,
+ "loss": 0.4129,
+ "step": 134750
+ },
+ {
+ "epoch": 0.6954870731241712,
+ "grad_norm": 20506.5625,
+ "learning_rate": 3.7495461355541206e-05,
+ "loss": 0.4104,
+ "step": 134800
+ },
+ {
+ "epoch": 0.6957450431067841,
+ "grad_norm": 26106.26953125,
+ "learning_rate": 3.746009538949584e-05,
+ "loss": 0.4122,
+ "step": 134850
+ },
+ {
+ "epoch": 0.6960030130893969,
+ "grad_norm": 25230.55859375,
+ "learning_rate": 3.742473611700874e-05,
+ "loss": 0.4173,
+ "step": 134900
+ },
+ {
+ "epoch": 0.6962609830720098,
+ "grad_norm": 23462.197265625,
+ "learning_rate": 3.738938355695402e-05,
+ "loss": 0.4211,
+ "step": 134950
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "grad_norm": 22550.8359375,
+ "learning_rate": 3.735403772820213e-05,
+ "loss": 0.4154,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "eval_loss": 0.399837851524353,
+ "eval_runtime": 3136.0222,
+ "eval_samples_per_second": 988.871,
+ "eval_steps_per_second": 1.931,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6967769230372354,
+ "grad_norm": 22235.15234375,
+ "learning_rate": 3.731869864962004e-05,
+ "loss": 0.4183,
+ "step": 135050
+ },
+ {
+ "epoch": 0.6970348930198482,
+ "grad_norm": 21969.208984375,
+ "learning_rate": 3.728336634007105e-05,
+ "loss": 0.41,
+ "step": 135100
+ },
+ {
+ "epoch": 0.6972928630024611,
+ "grad_norm": 22907.32421875,
+ "learning_rate": 3.724804081841488e-05,
+ "loss": 0.4213,
+ "step": 135150
+ },
+ {
+ "epoch": 0.6975508329850738,
+ "grad_norm": 22994.646484375,
+ "learning_rate": 3.721272210350757e-05,
+ "loss": 0.4103,
+ "step": 135200
+ },
+ {
+ "epoch": 0.6978088029676867,
+ "grad_norm": 22118.224609375,
+ "learning_rate": 3.717741021420162e-05,
+ "loss": 0.4195,
+ "step": 135250
+ },
+ {
+ "epoch": 0.6980667729502995,
+ "grad_norm": 19673.6484375,
+ "learning_rate": 3.7142105169345764e-05,
+ "loss": 0.4105,
+ "step": 135300
+ },
+ {
+ "epoch": 0.6983247429329124,
+ "grad_norm": 23110.041015625,
+ "learning_rate": 3.71068069877852e-05,
+ "loss": 0.4132,
+ "step": 135350
+ },
+ {
+ "epoch": 0.6985827129155251,
+ "grad_norm": 26589.453125,
+ "learning_rate": 3.707151568836144e-05,
+ "loss": 0.4171,
+ "step": 135400
+ },
+ {
+ "epoch": 0.6988406828981379,
+ "grad_norm": 25272.74609375,
+ "learning_rate": 3.7036231289912206e-05,
+ "loss": 0.4098,
+ "step": 135450
+ },
+ {
+ "epoch": 0.6990986528807508,
+ "grad_norm": 23238.626953125,
+ "learning_rate": 3.700095381127172e-05,
+ "loss": 0.4102,
+ "step": 135500
+ },
+ {
+ "epoch": 0.6993566228633636,
+ "grad_norm": 25412.8203125,
+ "learning_rate": 3.696568327127036e-05,
+ "loss": 0.4131,
+ "step": 135550
+ },
+ {
+ "epoch": 0.6996145928459765,
+ "grad_norm": 22329.0703125,
+ "learning_rate": 3.693041968873488e-05,
+ "loss": 0.4196,
+ "step": 135600
+ },
+ {
+ "epoch": 0.6998725628285892,
+ "grad_norm": 23497.068359375,
+ "learning_rate": 3.6895163082488294e-05,
+ "loss": 0.4137,
+ "step": 135650
+ },
+ {
+ "epoch": 0.7001305328112021,
+ "grad_norm": 23415.0859375,
+ "learning_rate": 3.6859913471349906e-05,
+ "loss": 0.4088,
+ "step": 135700
+ },
+ {
+ "epoch": 0.7003885027938149,
+ "grad_norm": 24474.064453125,
+ "learning_rate": 3.682467087413525e-05,
+ "loss": 0.4122,
+ "step": 135750
+ },
+ {
+ "epoch": 0.7006464727764278,
+ "grad_norm": 24427.3359375,
+ "learning_rate": 3.678943530965615e-05,
+ "loss": 0.4133,
+ "step": 135800
+ },
+ {
+ "epoch": 0.7009044427590405,
+ "grad_norm": 24399.58203125,
+ "learning_rate": 3.675420679672068e-05,
+ "loss": 0.4113,
+ "step": 135850
+ },
+ {
+ "epoch": 0.7011624127416534,
+ "grad_norm": 22070.033203125,
+ "learning_rate": 3.671898535413313e-05,
+ "loss": 0.4099,
+ "step": 135900
+ },
+ {
+ "epoch": 0.7014203827242662,
+ "grad_norm": 21846.20703125,
+ "learning_rate": 3.668377100069404e-05,
+ "loss": 0.4164,
+ "step": 135950
+ },
+ {
+ "epoch": 0.7016783527068791,
+ "grad_norm": 21927.2265625,
+ "learning_rate": 3.664856375520012e-05,
+ "loss": 0.4124,
+ "step": 136000
+ },
+ {
+ "epoch": 0.7019363226894918,
+ "grad_norm": 22155.341796875,
+ "learning_rate": 3.6613363636444344e-05,
+ "loss": 0.416,
+ "step": 136050
+ },
+ {
+ "epoch": 0.7021942926721046,
+ "grad_norm": 23344.486328125,
+ "learning_rate": 3.6578170663215826e-05,
+ "loss": 0.4162,
+ "step": 136100
+ },
+ {
+ "epoch": 0.7024522626547175,
+ "grad_norm": 23390.642578125,
+ "learning_rate": 3.6542984854299936e-05,
+ "loss": 0.4082,
+ "step": 136150
+ },
+ {
+ "epoch": 0.7027102326373303,
+ "grad_norm": 22980.90625,
+ "learning_rate": 3.6507806228478125e-05,
+ "loss": 0.4067,
+ "step": 136200
+ },
+ {
+ "epoch": 0.7029682026199432,
+ "grad_norm": 22321.662109375,
+ "learning_rate": 3.6472634804528095e-05,
+ "loss": 0.4129,
+ "step": 136250
+ },
+ {
+ "epoch": 0.7032261726025559,
+ "grad_norm": 22719.455078125,
+ "learning_rate": 3.643747060122366e-05,
+ "loss": 0.4169,
+ "step": 136300
+ },
+ {
+ "epoch": 0.7034841425851688,
+ "grad_norm": 25283.494140625,
+ "learning_rate": 3.640231363733481e-05,
+ "loss": 0.4081,
+ "step": 136350
+ },
+ {
+ "epoch": 0.7037421125677816,
+ "grad_norm": 24430.919921875,
+ "learning_rate": 3.636716393162764e-05,
+ "loss": 0.4187,
+ "step": 136400
+ },
+ {
+ "epoch": 0.7040000825503945,
+ "grad_norm": 23372.662109375,
+ "learning_rate": 3.633202150286435e-05,
+ "loss": 0.4117,
+ "step": 136450
+ },
+ {
+ "epoch": 0.7042580525330072,
+ "grad_norm": 23912.595703125,
+ "learning_rate": 3.6296886369803346e-05,
+ "loss": 0.4126,
+ "step": 136500
+ },
+ {
+ "epoch": 0.7045160225156201,
+ "grad_norm": 24092.0390625,
+ "learning_rate": 3.626175855119903e-05,
+ "loss": 0.4163,
+ "step": 136550
+ },
+ {
+ "epoch": 0.7047739924982329,
+ "grad_norm": 23452.2421875,
+ "learning_rate": 3.6226638065802e-05,
+ "loss": 0.4088,
+ "step": 136600
+ },
+ {
+ "epoch": 0.7050319624808458,
+ "grad_norm": 24399.787109375,
+ "learning_rate": 3.6191524932358845e-05,
+ "loss": 0.4139,
+ "step": 136650
+ },
+ {
+ "epoch": 0.7052899324634585,
+ "grad_norm": 23295.599609375,
+ "learning_rate": 3.6156419169612287e-05,
+ "loss": 0.4112,
+ "step": 136700
+ },
+ {
+ "epoch": 0.7055479024460714,
+ "grad_norm": 25809.876953125,
+ "learning_rate": 3.6121320796301126e-05,
+ "loss": 0.4141,
+ "step": 136750
+ },
+ {
+ "epoch": 0.7058058724286842,
+ "grad_norm": 21679.818359375,
+ "learning_rate": 3.608622983116018e-05,
+ "loss": 0.4183,
+ "step": 136800
+ },
+ {
+ "epoch": 0.706063842411297,
+ "grad_norm": 24492.578125,
+ "learning_rate": 3.6051146292920334e-05,
+ "loss": 0.4103,
+ "step": 136850
+ },
+ {
+ "epoch": 0.7063218123939099,
+ "grad_norm": 24805.59375,
+ "learning_rate": 3.601607020030847e-05,
+ "loss": 0.4129,
+ "step": 136900
+ },
+ {
+ "epoch": 0.7065797823765226,
+ "grad_norm": 23000.9765625,
+ "learning_rate": 3.5981001572047566e-05,
+ "loss": 0.4091,
+ "step": 136950
+ },
+ {
+ "epoch": 0.7068377523591355,
+ "grad_norm": 24590.6875,
+ "learning_rate": 3.594594042685655e-05,
+ "loss": 0.4061,
+ "step": 137000
+ },
+ {
+ "epoch": 0.7070957223417483,
+ "grad_norm": 22223.16015625,
+ "learning_rate": 3.5910886783450416e-05,
+ "loss": 0.4174,
+ "step": 137050
+ },
+ {
+ "epoch": 0.7073536923243612,
+ "grad_norm": 28207.7578125,
+ "learning_rate": 3.587584066054007e-05,
+ "loss": 0.4119,
+ "step": 137100
+ },
+ {
+ "epoch": 0.7076116623069739,
+ "grad_norm": 23703.271484375,
+ "learning_rate": 3.584080207683249e-05,
+ "loss": 0.4104,
+ "step": 137150
+ },
+ {
+ "epoch": 0.7078696322895868,
+ "grad_norm": 24903.92578125,
+ "learning_rate": 3.580577105103059e-05,
+ "loss": 0.4139,
+ "step": 137200
+ },
+ {
+ "epoch": 0.7081276022721996,
+ "grad_norm": 21130.029296875,
+ "learning_rate": 3.5770747601833235e-05,
+ "loss": 0.4208,
+ "step": 137250
+ },
+ {
+ "epoch": 0.7083855722548125,
+ "grad_norm": 22223.611328125,
+ "learning_rate": 3.5735731747935306e-05,
+ "loss": 0.4118,
+ "step": 137300
+ },
+ {
+ "epoch": 0.7086435422374252,
+ "grad_norm": 21862.12109375,
+ "learning_rate": 3.570072350802753e-05,
+ "loss": 0.4101,
+ "step": 137350
+ },
+ {
+ "epoch": 0.708901512220038,
+ "grad_norm": 22504.25390625,
+ "learning_rate": 3.566572290079667e-05,
+ "loss": 0.4187,
+ "step": 137400
+ },
+ {
+ "epoch": 0.7091594822026509,
+ "grad_norm": 21898.53125,
+ "learning_rate": 3.563072994492535e-05,
+ "loss": 0.4068,
+ "step": 137450
+ },
+ {
+ "epoch": 0.7094174521852638,
+ "grad_norm": 21629.5859375,
+ "learning_rate": 3.559574465909215e-05,
+ "loss": 0.4107,
+ "step": 137500
+ },
+ {
+ "epoch": 0.7096754221678765,
+ "grad_norm": 23078.080078125,
+ "learning_rate": 3.5560767061971515e-05,
+ "loss": 0.4093,
+ "step": 137550
+ },
+ {
+ "epoch": 0.7099333921504893,
+ "grad_norm": 21831.11328125,
+ "learning_rate": 3.5525797172233826e-05,
+ "loss": 0.4083,
+ "step": 137600
+ },
+ {
+ "epoch": 0.7101913621331022,
+ "grad_norm": 20934.220703125,
+ "learning_rate": 3.5490835008545334e-05,
+ "loss": 0.4143,
+ "step": 137650
+ },
+ {
+ "epoch": 0.710449332115715,
+ "grad_norm": 21335.0,
+ "learning_rate": 3.545588058956816e-05,
+ "loss": 0.4104,
+ "step": 137700
+ },
+ {
+ "epoch": 0.7107073020983279,
+ "grad_norm": 20424.279296875,
+ "learning_rate": 3.542093393396031e-05,
+ "loss": 0.4117,
+ "step": 137750
+ },
+ {
+ "epoch": 0.7109652720809406,
+ "grad_norm": 24527.76171875,
+ "learning_rate": 3.5385995060375596e-05,
+ "loss": 0.4128,
+ "step": 137800
+ },
+ {
+ "epoch": 0.7112232420635535,
+ "grad_norm": 23370.17578125,
+ "learning_rate": 3.535106398746376e-05,
+ "loss": 0.4149,
+ "step": 137850
+ },
+ {
+ "epoch": 0.7114812120461663,
+ "grad_norm": 22996.2890625,
+ "learning_rate": 3.531614073387028e-05,
+ "loss": 0.412,
+ "step": 137900
+ },
+ {
+ "epoch": 0.7117391820287792,
+ "grad_norm": 26592.931640625,
+ "learning_rate": 3.528122531823657e-05,
+ "loss": 0.4111,
+ "step": 137950
+ },
+ {
+ "epoch": 0.7119971520113919,
+ "grad_norm": 22353.35546875,
+ "learning_rate": 3.5246317759199745e-05,
+ "loss": 0.412,
+ "step": 138000
+ },
+ {
+ "epoch": 0.7122551219940048,
+ "grad_norm": 22266.91796875,
+ "learning_rate": 3.521141807539281e-05,
+ "loss": 0.4113,
+ "step": 138050
+ },
+ {
+ "epoch": 0.7125130919766176,
+ "grad_norm": 21723.318359375,
+ "learning_rate": 3.517652628544457e-05,
+ "loss": 0.4058,
+ "step": 138100
+ },
+ {
+ "epoch": 0.7127710619592305,
+ "grad_norm": 23738.322265625,
+ "learning_rate": 3.5141642407979535e-05,
+ "loss": 0.4072,
+ "step": 138150
+ },
+ {
+ "epoch": 0.7130290319418432,
+ "grad_norm": 25993.587890625,
+ "learning_rate": 3.5106766461618083e-05,
+ "loss": 0.4066,
+ "step": 138200
+ },
+ {
+ "epoch": 0.713287001924456,
+ "grad_norm": 23321.55859375,
+ "learning_rate": 3.50718984649763e-05,
+ "loss": 0.4104,
+ "step": 138250
+ },
+ {
+ "epoch": 0.7135449719070689,
+ "grad_norm": 22022.267578125,
+ "learning_rate": 3.503703843666605e-05,
+ "loss": 0.4096,
+ "step": 138300
+ },
+ {
+ "epoch": 0.7138029418896817,
+ "grad_norm": 22249.640625,
+ "learning_rate": 3.500218639529493e-05,
+ "loss": 0.4121,
+ "step": 138350
+ },
+ {
+ "epoch": 0.7140609118722946,
+ "grad_norm": 21145.283203125,
+ "learning_rate": 3.496734235946632e-05,
+ "loss": 0.4126,
+ "step": 138400
+ },
+ {
+ "epoch": 0.7143188818549073,
+ "grad_norm": 22439.38671875,
+ "learning_rate": 3.493250634777924e-05,
+ "loss": 0.4076,
+ "step": 138450
+ },
+ {
+ "epoch": 0.7145768518375202,
+ "grad_norm": 25641.93359375,
+ "learning_rate": 3.4897678378828516e-05,
+ "loss": 0.4105,
+ "step": 138500
+ },
+ {
+ "epoch": 0.714834821820133,
+ "grad_norm": 22200.46875,
+ "learning_rate": 3.486285847120465e-05,
+ "loss": 0.4097,
+ "step": 138550
+ },
+ {
+ "epoch": 0.7150927918027459,
+ "grad_norm": 22691.666015625,
+ "learning_rate": 3.482804664349381e-05,
+ "loss": 0.4154,
+ "step": 138600
+ },
+ {
+ "epoch": 0.7153507617853586,
+ "grad_norm": 22139.16796875,
+ "learning_rate": 3.479324291427788e-05,
+ "loss": 0.4124,
+ "step": 138650
+ },
+ {
+ "epoch": 0.7156087317679715,
+ "grad_norm": 23695.7578125,
+ "learning_rate": 3.4758447302134414e-05,
+ "loss": 0.4174,
+ "step": 138700
+ },
+ {
+ "epoch": 0.7158667017505843,
+ "grad_norm": 24720.06640625,
+ "learning_rate": 3.472365982563666e-05,
+ "loss": 0.4095,
+ "step": 138750
+ },
+ {
+ "epoch": 0.7161246717331972,
+ "grad_norm": 22861.171875,
+ "learning_rate": 3.4688880503353474e-05,
+ "loss": 0.4039,
+ "step": 138800
+ },
+ {
+ "epoch": 0.7163826417158099,
+ "grad_norm": 22751.833984375,
+ "learning_rate": 3.465410935384939e-05,
+ "loss": 0.4175,
+ "step": 138850
+ },
+ {
+ "epoch": 0.7166406116984227,
+ "grad_norm": 22689.5,
+ "learning_rate": 3.461934639568457e-05,
+ "loss": 0.4133,
+ "step": 138900
+ },
+ {
+ "epoch": 0.7168985816810356,
+ "grad_norm": 23292.1328125,
+ "learning_rate": 3.458459164741482e-05,
+ "loss": 0.4062,
+ "step": 138950
+ },
+ {
+ "epoch": 0.7171565516636484,
+ "grad_norm": 22390.515625,
+ "learning_rate": 3.4549845127591563e-05,
+ "loss": 0.4169,
+ "step": 139000
+ },
+ {
+ "epoch": 0.7174145216462613,
+ "grad_norm": 23531.9921875,
+ "learning_rate": 3.451510685476178e-05,
+ "loss": 0.4084,
+ "step": 139050
+ },
+ {
+ "epoch": 0.717672491628874,
+ "grad_norm": 23847.154296875,
+ "learning_rate": 3.448037684746812e-05,
+ "loss": 0.4134,
+ "step": 139100
+ },
+ {
+ "epoch": 0.7179304616114869,
+ "grad_norm": 22651.15234375,
+ "learning_rate": 3.4445655124248774e-05,
+ "loss": 0.4118,
+ "step": 139150
+ },
+ {
+ "epoch": 0.7181884315940997,
+ "grad_norm": 21893.123046875,
+ "learning_rate": 3.441094170363755e-05,
+ "loss": 0.4065,
+ "step": 139200
+ },
+ {
+ "epoch": 0.7184464015767126,
+ "grad_norm": 22238.685546875,
+ "learning_rate": 3.4376236604163756e-05,
+ "loss": 0.4164,
+ "step": 139250
+ },
+ {
+ "epoch": 0.7187043715593253,
+ "grad_norm": 25605.083984375,
+ "learning_rate": 3.434153984435234e-05,
+ "loss": 0.4105,
+ "step": 139300
+ },
+ {
+ "epoch": 0.7189623415419382,
+ "grad_norm": 22414.0703125,
+ "learning_rate": 3.430685144272374e-05,
+ "loss": 0.4095,
+ "step": 139350
+ },
+ {
+ "epoch": 0.719220311524551,
+ "grad_norm": 22067.443359375,
+ "learning_rate": 3.4272171417793954e-05,
+ "loss": 0.4105,
+ "step": 139400
+ },
+ {
+ "epoch": 0.7194782815071639,
+ "grad_norm": 22398.36328125,
+ "learning_rate": 3.423749978807454e-05,
+ "loss": 0.4065,
+ "step": 139450
+ },
+ {
+ "epoch": 0.7197362514897766,
+ "grad_norm": 25660.017578125,
+ "learning_rate": 3.420283657207248e-05,
+ "loss": 0.4139,
+ "step": 139500
+ },
+ {
+ "epoch": 0.7199942214723895,
+ "grad_norm": 27245.4609375,
+ "learning_rate": 3.416818178829039e-05,
+ "loss": 0.4106,
+ "step": 139550
+ },
+ {
+ "epoch": 0.7202521914550023,
+ "grad_norm": 22430.6484375,
+ "learning_rate": 3.413353545522628e-05,
+ "loss": 0.4103,
+ "step": 139600
+ },
+ {
+ "epoch": 0.7205101614376151,
+ "grad_norm": 25269.876953125,
+ "learning_rate": 3.409889759137373e-05,
+ "loss": 0.4073,
+ "step": 139650
+ },
+ {
+ "epoch": 0.720768131420228,
+ "grad_norm": 22811.275390625,
+ "learning_rate": 3.406426821522172e-05,
+ "loss": 0.4156,
+ "step": 139700
+ },
+ {
+ "epoch": 0.7210261014028407,
+ "grad_norm": 21838.966796875,
+ "learning_rate": 3.402964734525477e-05,
+ "loss": 0.4132,
+ "step": 139750
+ },
+ {
+ "epoch": 0.7212840713854536,
+ "grad_norm": 22130.935546875,
+ "learning_rate": 3.39950349999528e-05,
+ "loss": 0.418,
+ "step": 139800
+ },
+ {
+ "epoch": 0.7215420413680664,
+ "grad_norm": 22744.779296875,
+ "learning_rate": 3.396043119779123e-05,
+ "loss": 0.4098,
+ "step": 139850
+ },
+ {
+ "epoch": 0.7218000113506793,
+ "grad_norm": 22559.07421875,
+ "learning_rate": 3.392583595724093e-05,
+ "loss": 0.4159,
+ "step": 139900
+ },
+ {
+ "epoch": 0.722057981333292,
+ "grad_norm": 20920.349609375,
+ "learning_rate": 3.3891249296768116e-05,
+ "loss": 0.406,
+ "step": 139950
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "grad_norm": 20708.716796875,
+ "learning_rate": 3.38566712348345e-05,
+ "loss": 0.4102,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "eval_loss": 0.39852654933929443,
+ "eval_runtime": 3128.1309,
+ "eval_samples_per_second": 991.365,
+ "eval_steps_per_second": 1.936,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7225739212985177,
+ "grad_norm": 24440.734375,
+ "learning_rate": 3.382210178989718e-05,
+ "loss": 0.4144,
+ "step": 140050
+ },
+ {
+ "epoch": 0.7228318912811306,
+ "grad_norm": 22715.88671875,
+ "learning_rate": 3.378754098040867e-05,
+ "loss": 0.4146,
+ "step": 140100
+ },
+ {
+ "epoch": 0.7230898612637433,
+ "grad_norm": 23713.474609375,
+ "learning_rate": 3.375298882481683e-05,
+ "loss": 0.4089,
+ "step": 140150
+ },
+ {
+ "epoch": 0.7233478312463562,
+ "grad_norm": 24705.048828125,
+ "learning_rate": 3.371844534156497e-05,
+ "loss": 0.4052,
+ "step": 140200
+ },
+ {
+ "epoch": 0.723605801228969,
+ "grad_norm": 22624.98046875,
+ "learning_rate": 3.368391054909169e-05,
+ "loss": 0.4155,
+ "step": 140250
+ },
+ {
+ "epoch": 0.7238637712115819,
+ "grad_norm": 24774.72265625,
+ "learning_rate": 3.364938446583103e-05,
+ "loss": 0.4058,
+ "step": 140300
+ },
+ {
+ "epoch": 0.7241217411941946,
+ "grad_norm": 24109.02734375,
+ "learning_rate": 3.361486711021235e-05,
+ "loss": 0.4169,
+ "step": 140350
+ },
+ {
+ "epoch": 0.7243797111768074,
+ "grad_norm": 20315.724609375,
+ "learning_rate": 3.3580358500660284e-05,
+ "loss": 0.4135,
+ "step": 140400
+ },
+ {
+ "epoch": 0.7246376811594203,
+ "grad_norm": 26642.84765625,
+ "learning_rate": 3.3545858655594935e-05,
+ "loss": 0.4182,
+ "step": 140450
+ },
+ {
+ "epoch": 0.7248956511420331,
+ "grad_norm": 23466.93359375,
+ "learning_rate": 3.351136759343161e-05,
+ "loss": 0.4098,
+ "step": 140500
+ },
+ {
+ "epoch": 0.725153621124646,
+ "grad_norm": 25247.11328125,
+ "learning_rate": 3.3476885332580985e-05,
+ "loss": 0.4085,
+ "step": 140550
+ },
+ {
+ "epoch": 0.7254115911072587,
+ "grad_norm": 25220.11328125,
+ "learning_rate": 3.3442411891449e-05,
+ "loss": 0.4139,
+ "step": 140600
+ },
+ {
+ "epoch": 0.7256695610898716,
+ "grad_norm": 21836.095703125,
+ "learning_rate": 3.3407947288436936e-05,
+ "loss": 0.4127,
+ "step": 140650
+ },
+ {
+ "epoch": 0.7259275310724844,
+ "grad_norm": 22301.443359375,
+ "learning_rate": 3.3373491541941346e-05,
+ "loss": 0.4127,
+ "step": 140700
+ },
+ {
+ "epoch": 0.7261855010550973,
+ "grad_norm": 21902.615234375,
+ "learning_rate": 3.333904467035399e-05,
+ "loss": 0.4111,
+ "step": 140750
+ },
+ {
+ "epoch": 0.72644347103771,
+ "grad_norm": 21408.71484375,
+ "learning_rate": 3.3304606692061984e-05,
+ "loss": 0.4095,
+ "step": 140800
+ },
+ {
+ "epoch": 0.7267014410203229,
+ "grad_norm": 26146.03515625,
+ "learning_rate": 3.3270177625447626e-05,
+ "loss": 0.4096,
+ "step": 140850
+ },
+ {
+ "epoch": 0.7269594110029357,
+ "grad_norm": 22772.9921875,
+ "learning_rate": 3.323575748888852e-05,
+ "loss": 0.4109,
+ "step": 140900
+ },
+ {
+ "epoch": 0.7272173809855486,
+ "grad_norm": 24654.810546875,
+ "learning_rate": 3.320134630075742e-05,
+ "loss": 0.4135,
+ "step": 140950
+ },
+ {
+ "epoch": 0.7274753509681613,
+ "grad_norm": 23458.103515625,
+ "learning_rate": 3.31669440794224e-05,
+ "loss": 0.4128,
+ "step": 141000
+ },
+ {
+ "epoch": 0.7277333209507741,
+ "grad_norm": 22455.630859375,
+ "learning_rate": 3.3132550843246654e-05,
+ "loss": 0.411,
+ "step": 141050
+ },
+ {
+ "epoch": 0.727991290933387,
+ "grad_norm": 22372.08203125,
+ "learning_rate": 3.3098166610588655e-05,
+ "loss": 0.413,
+ "step": 141100
+ },
+ {
+ "epoch": 0.7282492609159998,
+ "grad_norm": 22878.216796875,
+ "learning_rate": 3.306379139980206e-05,
+ "loss": 0.4054,
+ "step": 141150
+ },
+ {
+ "epoch": 0.7285072308986127,
+ "grad_norm": 22959.708984375,
+ "learning_rate": 3.302942522923563e-05,
+ "loss": 0.4114,
+ "step": 141200
+ },
+ {
+ "epoch": 0.7287652008812254,
+ "grad_norm": 22574.986328125,
+ "learning_rate": 3.2995068117233417e-05,
+ "loss": 0.4105,
+ "step": 141250
+ },
+ {
+ "epoch": 0.7290231708638383,
+ "grad_norm": 23770.279296875,
+ "learning_rate": 3.2960720082134555e-05,
+ "loss": 0.4091,
+ "step": 141300
+ },
+ {
+ "epoch": 0.7292811408464511,
+ "grad_norm": 23017.416015625,
+ "learning_rate": 3.292638114227338e-05,
+ "loss": 0.411,
+ "step": 141350
+ },
+ {
+ "epoch": 0.729539110829064,
+ "grad_norm": 23605.982421875,
+ "learning_rate": 3.289205131597932e-05,
+ "loss": 0.4097,
+ "step": 141400
+ },
+ {
+ "epoch": 0.7297970808116767,
+ "grad_norm": 22409.12890625,
+ "learning_rate": 3.2857730621577006e-05,
+ "loss": 0.4096,
+ "step": 141450
+ },
+ {
+ "epoch": 0.7300550507942896,
+ "grad_norm": 22681.11328125,
+ "learning_rate": 3.282341907738613e-05,
+ "loss": 0.4066,
+ "step": 141500
+ },
+ {
+ "epoch": 0.7303130207769024,
+ "grad_norm": 27188.859375,
+ "learning_rate": 3.278911670172154e-05,
+ "loss": 0.4104,
+ "step": 141550
+ },
+ {
+ "epoch": 0.7305709907595153,
+ "grad_norm": 25134.85546875,
+ "learning_rate": 3.2754823512893225e-05,
+ "loss": 0.4105,
+ "step": 141600
+ },
+ {
+ "epoch": 0.730828960742128,
+ "grad_norm": 21408.478515625,
+ "learning_rate": 3.2720539529206154e-05,
+ "loss": 0.412,
+ "step": 141650
+ },
+ {
+ "epoch": 0.7310869307247408,
+ "grad_norm": 21062.59375,
+ "learning_rate": 3.26862647689605e-05,
+ "loss": 0.411,
+ "step": 141700
+ },
+ {
+ "epoch": 0.7313449007073537,
+ "grad_norm": 21591.23828125,
+ "learning_rate": 3.265199925045143e-05,
+ "loss": 0.4171,
+ "step": 141750
+ },
+ {
+ "epoch": 0.7316028706899665,
+ "grad_norm": 23328.751953125,
+ "learning_rate": 3.261774299196926e-05,
+ "loss": 0.4127,
+ "step": 141800
+ },
+ {
+ "epoch": 0.7318608406725794,
+ "grad_norm": 27247.59375,
+ "learning_rate": 3.258349601179928e-05,
+ "loss": 0.4087,
+ "step": 141850
+ },
+ {
+ "epoch": 0.7321188106551921,
+ "grad_norm": 24500.822265625,
+ "learning_rate": 3.254925832822188e-05,
+ "loss": 0.4015,
+ "step": 141900
+ },
+ {
+ "epoch": 0.732376780637805,
+ "grad_norm": 25855.849609375,
+ "learning_rate": 3.251502995951247e-05,
+ "loss": 0.4125,
+ "step": 141950
+ },
+ {
+ "epoch": 0.7326347506204178,
+ "grad_norm": 23075.234375,
+ "learning_rate": 3.248081092394148e-05,
+ "loss": 0.4112,
+ "step": 142000
+ },
+ {
+ "epoch": 0.7328927206030307,
+ "grad_norm": 25166.712890625,
+ "learning_rate": 3.2446601239774405e-05,
+ "loss": 0.4121,
+ "step": 142050
+ },
+ {
+ "epoch": 0.7331506905856434,
+ "grad_norm": 23327.337890625,
+ "learning_rate": 3.241240092527167e-05,
+ "loss": 0.41,
+ "step": 142100
+ },
+ {
+ "epoch": 0.7334086605682563,
+ "grad_norm": 34138.34375,
+ "learning_rate": 3.237820999868876e-05,
+ "loss": 0.413,
+ "step": 142150
+ },
+ {
+ "epoch": 0.7336666305508691,
+ "grad_norm": 23031.2109375,
+ "learning_rate": 3.234402847827612e-05,
+ "loss": 0.414,
+ "step": 142200
+ },
+ {
+ "epoch": 0.733924600533482,
+ "grad_norm": 23237.44921875,
+ "learning_rate": 3.230985638227921e-05,
+ "loss": 0.4159,
+ "step": 142250
+ },
+ {
+ "epoch": 0.7341825705160947,
+ "grad_norm": 21437.705078125,
+ "learning_rate": 3.2275693728938395e-05,
+ "loss": 0.4078,
+ "step": 142300
+ },
+ {
+ "epoch": 0.7344405404987076,
+ "grad_norm": 23815.9140625,
+ "learning_rate": 3.224154053648906e-05,
+ "loss": 0.4135,
+ "step": 142350
+ },
+ {
+ "epoch": 0.7346985104813204,
+ "grad_norm": 26809.724609375,
+ "learning_rate": 3.2207396823161514e-05,
+ "loss": 0.409,
+ "step": 142400
+ },
+ {
+ "epoch": 0.7349564804639332,
+ "grad_norm": 21905.6484375,
+ "learning_rate": 3.2173262607181e-05,
+ "loss": 0.41,
+ "step": 142450
+ },
+ {
+ "epoch": 0.735214450446546,
+ "grad_norm": 23628.076171875,
+ "learning_rate": 3.2139137906767743e-05,
+ "loss": 0.4175,
+ "step": 142500
+ },
+ {
+ "epoch": 0.7354724204291588,
+ "grad_norm": 24156.837890625,
+ "learning_rate": 3.210502274013679e-05,
+ "loss": 0.4114,
+ "step": 142550
+ },
+ {
+ "epoch": 0.7357303904117717,
+ "grad_norm": 21517.404296875,
+ "learning_rate": 3.207091712549819e-05,
+ "loss": 0.4112,
+ "step": 142600
+ },
+ {
+ "epoch": 0.7359883603943845,
+ "grad_norm": 22684.734375,
+ "learning_rate": 3.203682108105685e-05,
+ "loss": 0.417,
+ "step": 142650
+ },
+ {
+ "epoch": 0.7362463303769974,
+ "grad_norm": 22205.361328125,
+ "learning_rate": 3.2002734625012585e-05,
+ "loss": 0.4111,
+ "step": 142700
+ },
+ {
+ "epoch": 0.7365043003596101,
+ "grad_norm": 21131.41796875,
+ "learning_rate": 3.196865777556008e-05,
+ "loss": 0.4114,
+ "step": 142750
+ },
+ {
+ "epoch": 0.736762270342223,
+ "grad_norm": 23506.66796875,
+ "learning_rate": 3.1934590550888894e-05,
+ "loss": 0.4183,
+ "step": 142800
+ },
+ {
+ "epoch": 0.7370202403248358,
+ "grad_norm": 24435.4609375,
+ "learning_rate": 3.190053296918345e-05,
+ "loss": 0.4166,
+ "step": 142850
+ },
+ {
+ "epoch": 0.7372782103074487,
+ "grad_norm": 22610.4296875,
+ "learning_rate": 3.186648504862303e-05,
+ "loss": 0.4109,
+ "step": 142900
+ },
+ {
+ "epoch": 0.7375361802900614,
+ "grad_norm": 24722.974609375,
+ "learning_rate": 3.183244680738178e-05,
+ "loss": 0.4093,
+ "step": 142950
+ },
+ {
+ "epoch": 0.7377941502726743,
+ "grad_norm": 23927.6640625,
+ "learning_rate": 3.1798418263628596e-05,
+ "loss": 0.4106,
+ "step": 143000
+ },
+ {
+ "epoch": 0.7380521202552871,
+ "grad_norm": 23958.216796875,
+ "learning_rate": 3.176439943552732e-05,
+ "loss": 0.4067,
+ "step": 143050
+ },
+ {
+ "epoch": 0.7383100902379,
+ "grad_norm": 23272.818359375,
+ "learning_rate": 3.1730390341236496e-05,
+ "loss": 0.4086,
+ "step": 143100
+ },
+ {
+ "epoch": 0.7385680602205127,
+ "grad_norm": 20998.751953125,
+ "learning_rate": 3.1696390998909556e-05,
+ "loss": 0.4099,
+ "step": 143150
+ },
+ {
+ "epoch": 0.7388260302031255,
+ "grad_norm": 24493.677734375,
+ "learning_rate": 3.166240142669464e-05,
+ "loss": 0.413,
+ "step": 143200
+ },
+ {
+ "epoch": 0.7390840001857384,
+ "grad_norm": 22639.927734375,
+ "learning_rate": 3.162842164273479e-05,
+ "loss": 0.4105,
+ "step": 143250
+ },
+ {
+ "epoch": 0.7393419701683512,
+ "grad_norm": 24407.029296875,
+ "learning_rate": 3.15944516651677e-05,
+ "loss": 0.4188,
+ "step": 143300
+ },
+ {
+ "epoch": 0.7395999401509641,
+ "grad_norm": 26538.68359375,
+ "learning_rate": 3.156049151212591e-05,
+ "loss": 0.4135,
+ "step": 143350
+ },
+ {
+ "epoch": 0.7398579101335768,
+ "grad_norm": 24519.060546875,
+ "learning_rate": 3.1526541201736695e-05,
+ "loss": 0.4141,
+ "step": 143400
+ },
+ {
+ "epoch": 0.7401158801161897,
+ "grad_norm": 21236.681640625,
+ "learning_rate": 3.149260075212206e-05,
+ "loss": 0.4096,
+ "step": 143450
+ },
+ {
+ "epoch": 0.7403738500988025,
+ "grad_norm": 24463.015625,
+ "learning_rate": 3.1458670181398796e-05,
+ "loss": 0.4035,
+ "step": 143500
+ },
+ {
+ "epoch": 0.7406318200814154,
+ "grad_norm": 26984.408203125,
+ "learning_rate": 3.1424749507678336e-05,
+ "loss": 0.4115,
+ "step": 143550
+ },
+ {
+ "epoch": 0.7408897900640281,
+ "grad_norm": 25619.35546875,
+ "learning_rate": 3.139083874906691e-05,
+ "loss": 0.4131,
+ "step": 143600
+ },
+ {
+ "epoch": 0.741147760046641,
+ "grad_norm": 24277.7890625,
+ "learning_rate": 3.13569379236654e-05,
+ "loss": 0.3994,
+ "step": 143650
+ },
+ {
+ "epoch": 0.7414057300292538,
+ "grad_norm": 24007.654296875,
+ "learning_rate": 3.1323047049569446e-05,
+ "loss": 0.4091,
+ "step": 143700
+ },
+ {
+ "epoch": 0.7416637000118667,
+ "grad_norm": 21688.703125,
+ "learning_rate": 3.12891661448693e-05,
+ "loss": 0.4176,
+ "step": 143750
+ },
+ {
+ "epoch": 0.7419216699944794,
+ "grad_norm": 22735.900390625,
+ "learning_rate": 3.125529522764995e-05,
+ "loss": 0.4091,
+ "step": 143800
+ },
+ {
+ "epoch": 0.7421796399770922,
+ "grad_norm": 23359.259765625,
+ "learning_rate": 3.122143431599105e-05,
+ "loss": 0.4125,
+ "step": 143850
+ },
+ {
+ "epoch": 0.7424376099597051,
+ "grad_norm": 26637.263671875,
+ "learning_rate": 3.118758342796687e-05,
+ "loss": 0.4137,
+ "step": 143900
+ },
+ {
+ "epoch": 0.7426955799423179,
+ "grad_norm": 24977.3984375,
+ "learning_rate": 3.1153742581646406e-05,
+ "loss": 0.4094,
+ "step": 143950
+ },
+ {
+ "epoch": 0.7429535499249308,
+ "grad_norm": 25850.91796875,
+ "learning_rate": 3.111991179509318e-05,
+ "loss": 0.4092,
+ "step": 144000
+ },
+ {
+ "epoch": 0.7432115199075435,
+ "grad_norm": 22823.0625,
+ "learning_rate": 3.1086091086365474e-05,
+ "loss": 0.4111,
+ "step": 144050
+ },
+ {
+ "epoch": 0.7434694898901564,
+ "grad_norm": 24187.640625,
+ "learning_rate": 3.1052280473516076e-05,
+ "loss": 0.414,
+ "step": 144100
+ },
+ {
+ "epoch": 0.7437274598727692,
+ "grad_norm": 21726.537109375,
+ "learning_rate": 3.101847997459249e-05,
+ "loss": 0.4098,
+ "step": 144150
+ },
+ {
+ "epoch": 0.7439854298553821,
+ "grad_norm": 23075.27734375,
+ "learning_rate": 3.098468960763671e-05,
+ "loss": 0.4084,
+ "step": 144200
+ },
+ {
+ "epoch": 0.7442433998379948,
+ "grad_norm": 24207.529296875,
+ "learning_rate": 3.095090939068541e-05,
+ "loss": 0.4156,
+ "step": 144250
+ },
+ {
+ "epoch": 0.7445013698206077,
+ "grad_norm": 25209.39453125,
+ "learning_rate": 3.091713934176982e-05,
+ "loss": 0.4122,
+ "step": 144300
+ },
+ {
+ "epoch": 0.7447593398032205,
+ "grad_norm": 24308.361328125,
+ "learning_rate": 3.088337947891573e-05,
+ "loss": 0.408,
+ "step": 144350
+ },
+ {
+ "epoch": 0.7450173097858334,
+ "grad_norm": 22416.990234375,
+ "learning_rate": 3.0849629820143517e-05,
+ "loss": 0.4136,
+ "step": 144400
+ },
+ {
+ "epoch": 0.7452752797684461,
+ "grad_norm": 24417.943359375,
+ "learning_rate": 3.081589038346806e-05,
+ "loss": 0.4079,
+ "step": 144450
+ },
+ {
+ "epoch": 0.745533249751059,
+ "grad_norm": 21519.650390625,
+ "learning_rate": 3.078216118689885e-05,
+ "loss": 0.4073,
+ "step": 144500
+ },
+ {
+ "epoch": 0.7457912197336718,
+ "grad_norm": 22821.1796875,
+ "learning_rate": 3.074844224843986e-05,
+ "loss": 0.4058,
+ "step": 144550
+ },
+ {
+ "epoch": 0.7460491897162846,
+ "grad_norm": 22559.86328125,
+ "learning_rate": 3.071473358608963e-05,
+ "loss": 0.413,
+ "step": 144600
+ },
+ {
+ "epoch": 0.7463071596988974,
+ "grad_norm": 25803.658203125,
+ "learning_rate": 3.068103521784115e-05,
+ "loss": 0.4077,
+ "step": 144650
+ },
+ {
+ "epoch": 0.7465651296815102,
+ "grad_norm": 27601.787109375,
+ "learning_rate": 3.0647347161681983e-05,
+ "loss": 0.4057,
+ "step": 144700
+ },
+ {
+ "epoch": 0.7468230996641231,
+ "grad_norm": 21363.67578125,
+ "learning_rate": 3.061366943559417e-05,
+ "loss": 0.4082,
+ "step": 144750
+ },
+ {
+ "epoch": 0.7470810696467359,
+ "grad_norm": 24007.3046875,
+ "learning_rate": 3.058000205755421e-05,
+ "loss": 0.4121,
+ "step": 144800
+ },
+ {
+ "epoch": 0.7473390396293488,
+ "grad_norm": 29940.8125,
+ "learning_rate": 3.054634504553312e-05,
+ "loss": 0.4046,
+ "step": 144850
+ },
+ {
+ "epoch": 0.7475970096119615,
+ "grad_norm": 24161.90234375,
+ "learning_rate": 3.0512698417496334e-05,
+ "loss": 0.4108,
+ "step": 144900
+ },
+ {
+ "epoch": 0.7478549795945744,
+ "grad_norm": 22984.619140625,
+ "learning_rate": 3.0479062191403785e-05,
+ "loss": 0.4158,
+ "step": 144950
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "grad_norm": 26418.95703125,
+ "learning_rate": 3.0445436385209836e-05,
+ "loss": 0.4059,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "eval_loss": 0.3971329629421234,
+ "eval_runtime": 3201.285,
+ "eval_samples_per_second": 968.711,
+ "eval_steps_per_second": 1.892,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7483709195598001,
+ "grad_norm": 22503.525390625,
+ "learning_rate": 3.0411821016863308e-05,
+ "loss": 0.4048,
+ "step": 145050
+ },
+ {
+ "epoch": 0.7486288895424128,
+ "grad_norm": 23114.525390625,
+ "learning_rate": 3.03782161043074e-05,
+ "loss": 0.4111,
+ "step": 145100
+ },
+ {
+ "epoch": 0.7488868595250256,
+ "grad_norm": 23249.794921875,
+ "learning_rate": 3.0344621665479778e-05,
+ "loss": 0.4093,
+ "step": 145150
+ },
+ {
+ "epoch": 0.7491448295076385,
+ "grad_norm": 23568.833984375,
+ "learning_rate": 3.0311037718312518e-05,
+ "loss": 0.4166,
+ "step": 145200
+ },
+ {
+ "epoch": 0.7494027994902513,
+ "grad_norm": 21794.6015625,
+ "learning_rate": 3.027746428073206e-05,
+ "loss": 0.4122,
+ "step": 145250
+ },
+ {
+ "epoch": 0.7496607694728641,
+ "grad_norm": 23710.212890625,
+ "learning_rate": 3.024390137065929e-05,
+ "loss": 0.4066,
+ "step": 145300
+ },
+ {
+ "epoch": 0.7499187394554769,
+ "grad_norm": 23179.240234375,
+ "learning_rate": 3.0210349006009385e-05,
+ "loss": 0.4127,
+ "step": 145350
+ },
+ {
+ "epoch": 0.7501767094380898,
+ "grad_norm": 25111.078125,
+ "learning_rate": 3.017680720469199e-05,
+ "loss": 0.4128,
+ "step": 145400
+ },
+ {
+ "epoch": 0.7504346794207026,
+ "grad_norm": 24289.095703125,
+ "learning_rate": 3.0143275984611042e-05,
+ "loss": 0.4167,
+ "step": 145450
+ },
+ {
+ "epoch": 0.7506926494033155,
+ "grad_norm": 22695.802734375,
+ "learning_rate": 3.0109755363664893e-05,
+ "loss": 0.4135,
+ "step": 145500
+ },
+ {
+ "epoch": 0.7509506193859282,
+ "grad_norm": 26995.833984375,
+ "learning_rate": 3.0076245359746163e-05,
+ "loss": 0.4057,
+ "step": 145550
+ },
+ {
+ "epoch": 0.7512085893685411,
+ "grad_norm": 21887.568359375,
+ "learning_rate": 3.004274599074185e-05,
+ "loss": 0.4089,
+ "step": 145600
+ },
+ {
+ "epoch": 0.7514665593511539,
+ "grad_norm": 27344.78125,
+ "learning_rate": 3.00092572745333e-05,
+ "loss": 0.4156,
+ "step": 145650
+ },
+ {
+ "epoch": 0.7517245293337668,
+ "grad_norm": 25476.15234375,
+ "learning_rate": 2.9975779228996104e-05,
+ "loss": 0.4113,
+ "step": 145700
+ },
+ {
+ "epoch": 0.7519824993163795,
+ "grad_norm": 24602.64453125,
+ "learning_rate": 2.9942311872000215e-05,
+ "loss": 0.4077,
+ "step": 145750
+ },
+ {
+ "epoch": 0.7522404692989924,
+ "grad_norm": 24124.984375,
+ "learning_rate": 2.990885522140985e-05,
+ "loss": 0.4122,
+ "step": 145800
+ },
+ {
+ "epoch": 0.7524984392816052,
+ "grad_norm": 24945.2109375,
+ "learning_rate": 2.987540929508354e-05,
+ "loss": 0.409,
+ "step": 145850
+ },
+ {
+ "epoch": 0.752756409264218,
+ "grad_norm": 26535.109375,
+ "learning_rate": 2.9841974110874037e-05,
+ "loss": 0.4132,
+ "step": 145900
+ },
+ {
+ "epoch": 0.7530143792468308,
+ "grad_norm": 21566.904296875,
+ "learning_rate": 2.980854968662843e-05,
+ "loss": 0.4073,
+ "step": 145950
+ },
+ {
+ "epoch": 0.7532723492294436,
+ "grad_norm": 22965.73828125,
+ "learning_rate": 2.9775136040188007e-05,
+ "loss": 0.4124,
+ "step": 146000
+ },
+ {
+ "epoch": 0.7535303192120565,
+ "grad_norm": 25307.123046875,
+ "learning_rate": 2.974173318938833e-05,
+ "loss": 0.4134,
+ "step": 146050
+ },
+ {
+ "epoch": 0.7537882891946693,
+ "grad_norm": 22280.431640625,
+ "learning_rate": 2.9708341152059226e-05,
+ "loss": 0.4085,
+ "step": 146100
+ },
+ {
+ "epoch": 0.7540462591772822,
+ "grad_norm": 25268.705078125,
+ "learning_rate": 2.9674959946024662e-05,
+ "loss": 0.4031,
+ "step": 146150
+ },
+ {
+ "epoch": 0.7543042291598949,
+ "grad_norm": 20014.28125,
+ "learning_rate": 2.9641589589102918e-05,
+ "loss": 0.4093,
+ "step": 146200
+ },
+ {
+ "epoch": 0.7545621991425078,
+ "grad_norm": 28430.544921875,
+ "learning_rate": 2.9608230099106427e-05,
+ "loss": 0.4112,
+ "step": 146250
+ },
+ {
+ "epoch": 0.7548201691251206,
+ "grad_norm": 21031.328125,
+ "learning_rate": 2.9574881493841867e-05,
+ "loss": 0.4084,
+ "step": 146300
+ },
+ {
+ "epoch": 0.7550781391077335,
+ "grad_norm": 24118.916015625,
+ "learning_rate": 2.9541543791110032e-05,
+ "loss": 0.4152,
+ "step": 146350
+ },
+ {
+ "epoch": 0.7553361090903462,
+ "grad_norm": 20663.740234375,
+ "learning_rate": 2.950821700870598e-05,
+ "loss": 0.409,
+ "step": 146400
+ },
+ {
+ "epoch": 0.7555940790729591,
+ "grad_norm": 23081.328125,
+ "learning_rate": 2.9474901164418877e-05,
+ "loss": 0.4089,
+ "step": 146450
+ },
+ {
+ "epoch": 0.7558520490555719,
+ "grad_norm": 24167.80859375,
+ "learning_rate": 2.9441596276032085e-05,
+ "loss": 0.4096,
+ "step": 146500
+ },
+ {
+ "epoch": 0.7561100190381848,
+ "grad_norm": 24959.595703125,
+ "learning_rate": 2.940830236132313e-05,
+ "loss": 0.4109,
+ "step": 146550
+ },
+ {
+ "epoch": 0.7563679890207975,
+ "grad_norm": 22731.36328125,
+ "learning_rate": 2.9375019438063622e-05,
+ "loss": 0.41,
+ "step": 146600
+ },
+ {
+ "epoch": 0.7566259590034103,
+ "grad_norm": 24127.41015625,
+ "learning_rate": 2.9341747524019368e-05,
+ "loss": 0.4078,
+ "step": 146650
+ },
+ {
+ "epoch": 0.7568839289860232,
+ "grad_norm": 27476.791015625,
+ "learning_rate": 2.9308486636950254e-05,
+ "loss": 0.4063,
+ "step": 146700
+ },
+ {
+ "epoch": 0.757141898968636,
+ "grad_norm": 24664.61328125,
+ "learning_rate": 2.9275236794610328e-05,
+ "loss": 0.4086,
+ "step": 146750
+ },
+ {
+ "epoch": 0.7573998689512488,
+ "grad_norm": 23883.0625,
+ "learning_rate": 2.9241998014747664e-05,
+ "loss": 0.4046,
+ "step": 146800
+ },
+ {
+ "epoch": 0.7576578389338616,
+ "grad_norm": 23431.509765625,
+ "learning_rate": 2.9208770315104515e-05,
+ "loss": 0.4054,
+ "step": 146850
+ },
+ {
+ "epoch": 0.7579158089164745,
+ "grad_norm": 25177.9453125,
+ "learning_rate": 2.9175553713417176e-05,
+ "loss": 0.4094,
+ "step": 146900
+ },
+ {
+ "epoch": 0.7581737788990873,
+ "grad_norm": 22862.201171875,
+ "learning_rate": 2.9142348227416e-05,
+ "loss": 0.4073,
+ "step": 146950
+ },
+ {
+ "epoch": 0.7584317488817002,
+ "grad_norm": 21731.240234375,
+ "learning_rate": 2.9109153874825478e-05,
+ "loss": 0.4081,
+ "step": 147000
+ },
+ {
+ "epoch": 0.7586897188643129,
+ "grad_norm": 24952.87109375,
+ "learning_rate": 2.9075970673364083e-05,
+ "loss": 0.4092,
+ "step": 147050
+ },
+ {
+ "epoch": 0.7589476888469258,
+ "grad_norm": 23138.029296875,
+ "learning_rate": 2.9042798640744385e-05,
+ "loss": 0.4051,
+ "step": 147100
+ },
+ {
+ "epoch": 0.7592056588295386,
+ "grad_norm": 21496.501953125,
+ "learning_rate": 2.900963779467295e-05,
+ "loss": 0.4096,
+ "step": 147150
+ },
+ {
+ "epoch": 0.7594636288121515,
+ "grad_norm": 22243.36328125,
+ "learning_rate": 2.8976488152850406e-05,
+ "loss": 0.3985,
+ "step": 147200
+ },
+ {
+ "epoch": 0.7597215987947642,
+ "grad_norm": 24515.029296875,
+ "learning_rate": 2.894334973297137e-05,
+ "loss": 0.4043,
+ "step": 147250
+ },
+ {
+ "epoch": 0.759979568777377,
+ "grad_norm": 23431.802734375,
+ "learning_rate": 2.8910222552724553e-05,
+ "loss": 0.4092,
+ "step": 147300
+ },
+ {
+ "epoch": 0.7602375387599899,
+ "grad_norm": 24192.44140625,
+ "learning_rate": 2.8877106629792515e-05,
+ "loss": 0.413,
+ "step": 147350
+ },
+ {
+ "epoch": 0.7604955087426027,
+ "grad_norm": 24239.015625,
+ "learning_rate": 2.884400198185196e-05,
+ "loss": 0.4064,
+ "step": 147400
+ },
+ {
+ "epoch": 0.7607534787252155,
+ "grad_norm": 22407.27734375,
+ "learning_rate": 2.881090862657348e-05,
+ "loss": 0.4086,
+ "step": 147450
+ },
+ {
+ "epoch": 0.7610114487078283,
+ "grad_norm": 24915.517578125,
+ "learning_rate": 2.877782658162166e-05,
+ "loss": 0.4067,
+ "step": 147500
+ },
+ {
+ "epoch": 0.7612694186904412,
+ "grad_norm": 23721.33984375,
+ "learning_rate": 2.8744755864655098e-05,
+ "loss": 0.4078,
+ "step": 147550
+ },
+ {
+ "epoch": 0.761527388673054,
+ "grad_norm": 23041.625,
+ "learning_rate": 2.8711696493326233e-05,
+ "loss": 0.4092,
+ "step": 147600
+ },
+ {
+ "epoch": 0.7617853586556669,
+ "grad_norm": 24021.81640625,
+ "learning_rate": 2.867864848528158e-05,
+ "loss": 0.4116,
+ "step": 147650
+ },
+ {
+ "epoch": 0.7620433286382796,
+ "grad_norm": 21309.7890625,
+ "learning_rate": 2.8645611858161502e-05,
+ "loss": 0.414,
+ "step": 147700
+ },
+ {
+ "epoch": 0.7623012986208925,
+ "grad_norm": 21959.544921875,
+ "learning_rate": 2.8612586629600307e-05,
+ "loss": 0.4113,
+ "step": 147750
+ },
+ {
+ "epoch": 0.7625592686035053,
+ "grad_norm": 22090.75,
+ "learning_rate": 2.857957281722623e-05,
+ "loss": 0.41,
+ "step": 147800
+ },
+ {
+ "epoch": 0.7628172385861182,
+ "grad_norm": 21273.6640625,
+ "learning_rate": 2.854657043866138e-05,
+ "loss": 0.4043,
+ "step": 147850
+ },
+ {
+ "epoch": 0.7630752085687309,
+ "grad_norm": 22781.33984375,
+ "learning_rate": 2.8513579511521825e-05,
+ "loss": 0.4009,
+ "step": 147900
+ },
+ {
+ "epoch": 0.7633331785513437,
+ "grad_norm": 24383.95703125,
+ "learning_rate": 2.8480600053417472e-05,
+ "loss": 0.4077,
+ "step": 147950
+ },
+ {
+ "epoch": 0.7635911485339566,
+ "grad_norm": 23988.673828125,
+ "learning_rate": 2.8447632081952104e-05,
+ "loss": 0.4048,
+ "step": 148000
+ },
+ {
+ "epoch": 0.7638491185165694,
+ "grad_norm": 24593.1484375,
+ "learning_rate": 2.8414675614723397e-05,
+ "loss": 0.4145,
+ "step": 148050
+ },
+ {
+ "epoch": 0.7641070884991822,
+ "grad_norm": 25818.216796875,
+ "learning_rate": 2.838173066932287e-05,
+ "loss": 0.408,
+ "step": 148100
+ },
+ {
+ "epoch": 0.764365058481795,
+ "grad_norm": 25780.1796875,
+ "learning_rate": 2.8348797263335886e-05,
+ "loss": 0.4109,
+ "step": 148150
+ },
+ {
+ "epoch": 0.7646230284644079,
+ "grad_norm": 22835.51171875,
+ "learning_rate": 2.8315875414341687e-05,
+ "loss": 0.4037,
+ "step": 148200
+ },
+ {
+ "epoch": 0.7648809984470207,
+ "grad_norm": 22711.501953125,
+ "learning_rate": 2.8282965139913296e-05,
+ "loss": 0.4117,
+ "step": 148250
+ },
+ {
+ "epoch": 0.7651389684296336,
+ "grad_norm": 22654.080078125,
+ "learning_rate": 2.825006645761758e-05,
+ "loss": 0.4094,
+ "step": 148300
+ },
+ {
+ "epoch": 0.7653969384122463,
+ "grad_norm": 23406.8671875,
+ "learning_rate": 2.821717938501526e-05,
+ "loss": 0.4096,
+ "step": 148350
+ },
+ {
+ "epoch": 0.7656549083948592,
+ "grad_norm": 23591.68359375,
+ "learning_rate": 2.8184303939660745e-05,
+ "loss": 0.4087,
+ "step": 148400
+ },
+ {
+ "epoch": 0.765912878377472,
+ "grad_norm": 21550.94140625,
+ "learning_rate": 2.815144013910237e-05,
+ "loss": 0.4046,
+ "step": 148450
+ },
+ {
+ "epoch": 0.7661708483600849,
+ "grad_norm": 23503.48046875,
+ "learning_rate": 2.8118588000882177e-05,
+ "loss": 0.4116,
+ "step": 148500
+ },
+ {
+ "epoch": 0.7664288183426976,
+ "grad_norm": 25247.244140625,
+ "learning_rate": 2.8085747542536e-05,
+ "loss": 0.4023,
+ "step": 148550
+ },
+ {
+ "epoch": 0.7666867883253105,
+ "grad_norm": 23665.91796875,
+ "learning_rate": 2.805291878159344e-05,
+ "loss": 0.4117,
+ "step": 148600
+ },
+ {
+ "epoch": 0.7669447583079233,
+ "grad_norm": 22785.59765625,
+ "learning_rate": 2.8020101735577837e-05,
+ "loss": 0.4084,
+ "step": 148650
+ },
+ {
+ "epoch": 0.7672027282905362,
+ "grad_norm": 20447.72265625,
+ "learning_rate": 2.7987296422006327e-05,
+ "loss": 0.4091,
+ "step": 148700
+ },
+ {
+ "epoch": 0.7674606982731489,
+ "grad_norm": 24965.869140625,
+ "learning_rate": 2.795450285838974e-05,
+ "loss": 0.4067,
+ "step": 148750
+ },
+ {
+ "epoch": 0.7677186682557617,
+ "grad_norm": 24323.09765625,
+ "learning_rate": 2.7921721062232637e-05,
+ "loss": 0.4037,
+ "step": 148800
+ },
+ {
+ "epoch": 0.7679766382383746,
+ "grad_norm": 23956.177734375,
+ "learning_rate": 2.7888951051033314e-05,
+ "loss": 0.4079,
+ "step": 148850
+ },
+ {
+ "epoch": 0.7682346082209874,
+ "grad_norm": 24222.4140625,
+ "learning_rate": 2.7856192842283756e-05,
+ "loss": 0.4112,
+ "step": 148900
+ },
+ {
+ "epoch": 0.7684925782036002,
+ "grad_norm": 24444.046875,
+ "learning_rate": 2.782344645346966e-05,
+ "loss": 0.4148,
+ "step": 148950
+ },
+ {
+ "epoch": 0.768750548186213,
+ "grad_norm": 23160.578125,
+ "learning_rate": 2.779071190207046e-05,
+ "loss": 0.4063,
+ "step": 149000
+ },
+ {
+ "epoch": 0.7690085181688259,
+ "grad_norm": 25806.732421875,
+ "learning_rate": 2.7757989205559142e-05,
+ "loss": 0.4112,
+ "step": 149050
+ },
+ {
+ "epoch": 0.7692664881514387,
+ "grad_norm": 21389.734375,
+ "learning_rate": 2.7725278381402524e-05,
+ "loss": 0.4104,
+ "step": 149100
+ },
+ {
+ "epoch": 0.7695244581340516,
+ "grad_norm": 23550.23828125,
+ "learning_rate": 2.769257944706098e-05,
+ "loss": 0.4121,
+ "step": 149150
+ },
+ {
+ "epoch": 0.7697824281166643,
+ "grad_norm": 21442.373046875,
+ "learning_rate": 2.765989241998854e-05,
+ "loss": 0.4087,
+ "step": 149200
+ },
+ {
+ "epoch": 0.7700403980992772,
+ "grad_norm": 23958.978515625,
+ "learning_rate": 2.7627217317632993e-05,
+ "loss": 0.4136,
+ "step": 149250
+ },
+ {
+ "epoch": 0.77029836808189,
+ "grad_norm": 22143.07421875,
+ "learning_rate": 2.759455415743556e-05,
+ "loss": 0.41,
+ "step": 149300
+ },
+ {
+ "epoch": 0.7705563380645029,
+ "grad_norm": 22873.86328125,
+ "learning_rate": 2.7561902956831294e-05,
+ "loss": 0.4094,
+ "step": 149350
+ },
+ {
+ "epoch": 0.7708143080471156,
+ "grad_norm": 22419.3046875,
+ "learning_rate": 2.7529263733248734e-05,
+ "loss": 0.4133,
+ "step": 149400
+ },
+ {
+ "epoch": 0.7710722780297284,
+ "grad_norm": 22167.474609375,
+ "learning_rate": 2.7496636504110075e-05,
+ "loss": 0.4181,
+ "step": 149450
+ },
+ {
+ "epoch": 0.7713302480123413,
+ "grad_norm": 25449.96875,
+ "learning_rate": 2.74640212868311e-05,
+ "loss": 0.412,
+ "step": 149500
+ },
+ {
+ "epoch": 0.7715882179949541,
+ "grad_norm": 22876.767578125,
+ "learning_rate": 2.7431418098821154e-05,
+ "loss": 0.4087,
+ "step": 149550
+ },
+ {
+ "epoch": 0.7718461879775669,
+ "grad_norm": 25600.65625,
+ "learning_rate": 2.7398826957483235e-05,
+ "loss": 0.4133,
+ "step": 149600
+ },
+ {
+ "epoch": 0.7721041579601797,
+ "grad_norm": 21764.0,
+ "learning_rate": 2.7366247880213834e-05,
+ "loss": 0.4073,
+ "step": 149650
+ },
+ {
+ "epoch": 0.7723621279427926,
+ "grad_norm": 21836.0625,
+ "learning_rate": 2.7333680884403046e-05,
+ "loss": 0.4165,
+ "step": 149700
+ },
+ {
+ "epoch": 0.7726200979254054,
+ "grad_norm": 22049.466796875,
+ "learning_rate": 2.7301125987434496e-05,
+ "loss": 0.4104,
+ "step": 149750
+ },
+ {
+ "epoch": 0.7728780679080183,
+ "grad_norm": 25398.28515625,
+ "learning_rate": 2.7268583206685348e-05,
+ "loss": 0.4036,
+ "step": 149800
+ },
+ {
+ "epoch": 0.773136037890631,
+ "grad_norm": 22303.654296875,
+ "learning_rate": 2.72360525595263e-05,
+ "loss": 0.4077,
+ "step": 149850
+ },
+ {
+ "epoch": 0.7733940078732439,
+ "grad_norm": 24734.65234375,
+ "learning_rate": 2.7203534063321633e-05,
+ "loss": 0.409,
+ "step": 149900
+ },
+ {
+ "epoch": 0.7736519778558567,
+ "grad_norm": 22068.283203125,
+ "learning_rate": 2.7171027735429023e-05,
+ "loss": 0.4148,
+ "step": 149950
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "grad_norm": 23250.4921875,
+ "learning_rate": 2.7138533593199766e-05,
+ "loss": 0.4062,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "eval_loss": 0.3953176736831665,
+ "eval_runtime": 3196.6561,
+ "eval_samples_per_second": 970.114,
+ "eval_steps_per_second": 1.895,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7741679178210823,
+ "grad_norm": 26452.75390625,
+ "learning_rate": 2.710605165397859e-05,
+ "loss": 0.4098,
+ "step": 150050
+ },
+ {
+ "epoch": 0.7744258878036951,
+ "grad_norm": 23934.783203125,
+ "learning_rate": 2.707358193510371e-05,
+ "loss": 0.4113,
+ "step": 150100
+ },
+ {
+ "epoch": 0.774683857786308,
+ "grad_norm": 22443.591796875,
+ "learning_rate": 2.7041124453906884e-05,
+ "loss": 0.4119,
+ "step": 150150
+ },
+ {
+ "epoch": 0.7749418277689208,
+ "grad_norm": 23333.529296875,
+ "learning_rate": 2.7008679227713214e-05,
+ "loss": 0.4029,
+ "step": 150200
+ },
+ {
+ "epoch": 0.7751997977515336,
+ "grad_norm": 22431.576171875,
+ "learning_rate": 2.6976246273841388e-05,
+ "loss": 0.4045,
+ "step": 150250
+ },
+ {
+ "epoch": 0.7754577677341464,
+ "grad_norm": 26959.68359375,
+ "learning_rate": 2.694382560960348e-05,
+ "loss": 0.4072,
+ "step": 150300
+ },
+ {
+ "epoch": 0.7757157377167593,
+ "grad_norm": 21064.66015625,
+ "learning_rate": 2.6911417252304994e-05,
+ "loss": 0.411,
+ "step": 150350
+ },
+ {
+ "epoch": 0.7759737076993721,
+ "grad_norm": 23242.583984375,
+ "learning_rate": 2.6879021219244906e-05,
+ "loss": 0.4075,
+ "step": 150400
+ },
+ {
+ "epoch": 0.776231677681985,
+ "grad_norm": 24738.037109375,
+ "learning_rate": 2.6846637527715546e-05,
+ "loss": 0.4069,
+ "step": 150450
+ },
+ {
+ "epoch": 0.7764896476645977,
+ "grad_norm": 23944.759765625,
+ "learning_rate": 2.681426619500277e-05,
+ "loss": 0.403,
+ "step": 150500
+ },
+ {
+ "epoch": 0.7767476176472106,
+ "grad_norm": 22064.611328125,
+ "learning_rate": 2.678190723838572e-05,
+ "loss": 0.4045,
+ "step": 150550
+ },
+ {
+ "epoch": 0.7770055876298234,
+ "grad_norm": 24025.298828125,
+ "learning_rate": 2.6749560675137002e-05,
+ "loss": 0.4087,
+ "step": 150600
+ },
+ {
+ "epoch": 0.7772635576124363,
+ "grad_norm": 20863.119140625,
+ "learning_rate": 2.6717226522522553e-05,
+ "loss": 0.4087,
+ "step": 150650
+ },
+ {
+ "epoch": 0.777521527595049,
+ "grad_norm": 24537.642578125,
+ "learning_rate": 2.668490479780179e-05,
+ "loss": 0.4127,
+ "step": 150700
+ },
+ {
+ "epoch": 0.7777794975776618,
+ "grad_norm": 24400.193359375,
+ "learning_rate": 2.665259551822733e-05,
+ "loss": 0.4066,
+ "step": 150750
+ },
+ {
+ "epoch": 0.7780374675602747,
+ "grad_norm": 25251.81640625,
+ "learning_rate": 2.6620298701045322e-05,
+ "loss": 0.4111,
+ "step": 150800
+ },
+ {
+ "epoch": 0.7782954375428875,
+ "grad_norm": 23078.0,
+ "learning_rate": 2.658801436349511e-05,
+ "loss": 0.4109,
+ "step": 150850
+ },
+ {
+ "epoch": 0.7785534075255003,
+ "grad_norm": 20437.556640625,
+ "learning_rate": 2.655574252280949e-05,
+ "loss": 0.4096,
+ "step": 150900
+ },
+ {
+ "epoch": 0.7788113775081131,
+ "grad_norm": 24091.796875,
+ "learning_rate": 2.652348319621457e-05,
+ "loss": 0.4097,
+ "step": 150950
+ },
+ {
+ "epoch": 0.779069347490726,
+ "grad_norm": 22893.6640625,
+ "learning_rate": 2.6491236400929686e-05,
+ "loss": 0.4093,
+ "step": 151000
+ },
+ {
+ "epoch": 0.7793273174733388,
+ "grad_norm": 22871.80859375,
+ "learning_rate": 2.645900215416761e-05,
+ "loss": 0.407,
+ "step": 151050
+ },
+ {
+ "epoch": 0.7795852874559517,
+ "grad_norm": 21766.30078125,
+ "learning_rate": 2.642678047313435e-05,
+ "loss": 0.4071,
+ "step": 151100
+ },
+ {
+ "epoch": 0.7798432574385644,
+ "grad_norm": 24945.544921875,
+ "learning_rate": 2.639457137502919e-05,
+ "loss": 0.4073,
+ "step": 151150
+ },
+ {
+ "epoch": 0.7801012274211773,
+ "grad_norm": 22374.009765625,
+ "learning_rate": 2.636237487704475e-05,
+ "loss": 0.409,
+ "step": 151200
+ },
+ {
+ "epoch": 0.7803591974037901,
+ "grad_norm": 23499.08984375,
+ "learning_rate": 2.6330190996366875e-05,
+ "loss": 0.4087,
+ "step": 151250
+ },
+ {
+ "epoch": 0.780617167386403,
+ "grad_norm": 24672.017578125,
+ "learning_rate": 2.629801975017469e-05,
+ "loss": 0.4075,
+ "step": 151300
+ },
+ {
+ "epoch": 0.7808751373690157,
+ "grad_norm": 23105.05078125,
+ "learning_rate": 2.6265861155640626e-05,
+ "loss": 0.4031,
+ "step": 151350
+ },
+ {
+ "epoch": 0.7811331073516286,
+ "grad_norm": 23226.171875,
+ "learning_rate": 2.6233715229930282e-05,
+ "loss": 0.4137,
+ "step": 151400
+ },
+ {
+ "epoch": 0.7813910773342414,
+ "grad_norm": 24494.732421875,
+ "learning_rate": 2.620158199020255e-05,
+ "loss": 0.4089,
+ "step": 151450
+ },
+ {
+ "epoch": 0.7816490473168543,
+ "grad_norm": 24024.236328125,
+ "learning_rate": 2.616946145360952e-05,
+ "loss": 0.4084,
+ "step": 151500
+ },
+ {
+ "epoch": 0.781907017299467,
+ "grad_norm": 21957.2265625,
+ "learning_rate": 2.613735363729649e-05,
+ "loss": 0.4079,
+ "step": 151550
+ },
+ {
+ "epoch": 0.7821649872820798,
+ "grad_norm": 22637.291015625,
+ "learning_rate": 2.6105258558402056e-05,
+ "loss": 0.4093,
+ "step": 151600
+ },
+ {
+ "epoch": 0.7824229572646927,
+ "grad_norm": 27436.56640625,
+ "learning_rate": 2.607317623405787e-05,
+ "loss": 0.4054,
+ "step": 151650
+ },
+ {
+ "epoch": 0.7826809272473055,
+ "grad_norm": 21909.509765625,
+ "learning_rate": 2.6041106681388922e-05,
+ "loss": 0.4052,
+ "step": 151700
+ },
+ {
+ "epoch": 0.7829388972299183,
+ "grad_norm": 22887.494140625,
+ "learning_rate": 2.6009049917513283e-05,
+ "loss": 0.408,
+ "step": 151750
+ },
+ {
+ "epoch": 0.7831968672125311,
+ "grad_norm": 20771.53125,
+ "learning_rate": 2.5977005959542222e-05,
+ "loss": 0.4052,
+ "step": 151800
+ },
+ {
+ "epoch": 0.783454837195144,
+ "grad_norm": 22012.322265625,
+ "learning_rate": 2.5944974824580244e-05,
+ "loss": 0.4053,
+ "step": 151850
+ },
+ {
+ "epoch": 0.7837128071777568,
+ "grad_norm": 25365.822265625,
+ "learning_rate": 2.5912956529724865e-05,
+ "loss": 0.4141,
+ "step": 151900
+ },
+ {
+ "epoch": 0.7839707771603697,
+ "grad_norm": 23211.658203125,
+ "learning_rate": 2.5880951092066885e-05,
+ "loss": 0.4094,
+ "step": 151950
+ },
+ {
+ "epoch": 0.7842287471429824,
+ "grad_norm": 21514.79296875,
+ "learning_rate": 2.584895852869018e-05,
+ "loss": 0.4056,
+ "step": 152000
+ },
+ {
+ "epoch": 0.7844867171255953,
+ "grad_norm": 23275.76953125,
+ "learning_rate": 2.581697885667176e-05,
+ "loss": 0.4076,
+ "step": 152050
+ },
+ {
+ "epoch": 0.7847446871082081,
+ "grad_norm": 24080.478515625,
+ "learning_rate": 2.578501209308174e-05,
+ "loss": 0.409,
+ "step": 152100
+ },
+ {
+ "epoch": 0.785002657090821,
+ "grad_norm": 23384.275390625,
+ "learning_rate": 2.5753058254983376e-05,
+ "loss": 0.4063,
+ "step": 152150
+ },
+ {
+ "epoch": 0.7852606270734337,
+ "grad_norm": 22736.451171875,
+ "learning_rate": 2.572111735943298e-05,
+ "loss": 0.4054,
+ "step": 152200
+ },
+ {
+ "epoch": 0.7855185970560465,
+ "grad_norm": 24730.462890625,
+ "learning_rate": 2.568918942348002e-05,
+ "loss": 0.4074,
+ "step": 152250
+ },
+ {
+ "epoch": 0.7857765670386594,
+ "grad_norm": 23020.759765625,
+ "learning_rate": 2.5657274464166996e-05,
+ "loss": 0.4143,
+ "step": 152300
+ },
+ {
+ "epoch": 0.7860345370212722,
+ "grad_norm": 22263.357421875,
+ "learning_rate": 2.56253724985295e-05,
+ "loss": 0.4075,
+ "step": 152350
+ },
+ {
+ "epoch": 0.786292507003885,
+ "grad_norm": 23515.408203125,
+ "learning_rate": 2.5593483543596165e-05,
+ "loss": 0.4055,
+ "step": 152400
+ },
+ {
+ "epoch": 0.7865504769864978,
+ "grad_norm": 21960.447265625,
+ "learning_rate": 2.55616076163887e-05,
+ "loss": 0.407,
+ "step": 152450
+ },
+ {
+ "epoch": 0.7868084469691107,
+ "grad_norm": 26880.94140625,
+ "learning_rate": 2.55297447339219e-05,
+ "loss": 0.4029,
+ "step": 152500
+ },
+ {
+ "epoch": 0.7870664169517235,
+ "grad_norm": 22276.259765625,
+ "learning_rate": 2.5497894913203492e-05,
+ "loss": 0.4038,
+ "step": 152550
+ },
+ {
+ "epoch": 0.7873243869343364,
+ "grad_norm": 22566.541015625,
+ "learning_rate": 2.5466058171234336e-05,
+ "loss": 0.4055,
+ "step": 152600
+ },
+ {
+ "epoch": 0.7875823569169491,
+ "grad_norm": 24620.486328125,
+ "learning_rate": 2.543423452500826e-05,
+ "loss": 0.4031,
+ "step": 152650
+ },
+ {
+ "epoch": 0.787840326899562,
+ "grad_norm": 24162.99609375,
+ "learning_rate": 2.540242399151208e-05,
+ "loss": 0.4075,
+ "step": 152700
+ },
+ {
+ "epoch": 0.7880982968821748,
+ "grad_norm": 25309.958984375,
+ "learning_rate": 2.537062658772572e-05,
+ "loss": 0.4052,
+ "step": 152750
+ },
+ {
+ "epoch": 0.7883562668647877,
+ "grad_norm": 22024.390625,
+ "learning_rate": 2.533884233062192e-05,
+ "loss": 0.4036,
+ "step": 152800
+ },
+ {
+ "epoch": 0.7886142368474004,
+ "grad_norm": 22356.041015625,
+ "learning_rate": 2.530707123716657e-05,
+ "loss": 0.4065,
+ "step": 152850
+ },
+ {
+ "epoch": 0.7888722068300132,
+ "grad_norm": 22957.642578125,
+ "learning_rate": 2.527531332431844e-05,
+ "loss": 0.403,
+ "step": 152900
+ },
+ {
+ "epoch": 0.7891301768126261,
+ "grad_norm": 22161.298828125,
+ "learning_rate": 2.52435686090293e-05,
+ "loss": 0.4046,
+ "step": 152950
+ },
+ {
+ "epoch": 0.7893881467952389,
+ "grad_norm": 22849.720703125,
+ "learning_rate": 2.5211837108243847e-05,
+ "loss": 0.4045,
+ "step": 153000
+ },
+ {
+ "epoch": 0.7896461167778517,
+ "grad_norm": 25891.248046875,
+ "learning_rate": 2.5180118838899756e-05,
+ "loss": 0.4083,
+ "step": 153050
+ },
+ {
+ "epoch": 0.7899040867604645,
+ "grad_norm": 23150.634765625,
+ "learning_rate": 2.5148413817927598e-05,
+ "loss": 0.4104,
+ "step": 153100
+ },
+ {
+ "epoch": 0.7901620567430774,
+ "grad_norm": 23457.515625,
+ "learning_rate": 2.511672206225094e-05,
+ "loss": 0.4101,
+ "step": 153150
+ },
+ {
+ "epoch": 0.7904200267256902,
+ "grad_norm": 21316.8828125,
+ "learning_rate": 2.508504358878621e-05,
+ "loss": 0.4091,
+ "step": 153200
+ },
+ {
+ "epoch": 0.7906779967083031,
+ "grad_norm": 25747.87109375,
+ "learning_rate": 2.5053378414442748e-05,
+ "loss": 0.4131,
+ "step": 153250
+ },
+ {
+ "epoch": 0.7909359666909158,
+ "grad_norm": 21499.56640625,
+ "learning_rate": 2.502172655612286e-05,
+ "loss": 0.4028,
+ "step": 153300
+ },
+ {
+ "epoch": 0.7911939366735287,
+ "grad_norm": 22949.970703125,
+ "learning_rate": 2.499008803072162e-05,
+ "loss": 0.4078,
+ "step": 153350
+ },
+ {
+ "epoch": 0.7914519066561415,
+ "grad_norm": 26207.181640625,
+ "learning_rate": 2.495846285512714e-05,
+ "loss": 0.4064,
+ "step": 153400
+ },
+ {
+ "epoch": 0.7917098766387544,
+ "grad_norm": 25037.625,
+ "learning_rate": 2.4926851046220246e-05,
+ "loss": 0.4067,
+ "step": 153450
+ },
+ {
+ "epoch": 0.7919678466213671,
+ "grad_norm": 24114.482421875,
+ "learning_rate": 2.4895252620874775e-05,
+ "loss": 0.4123,
+ "step": 153500
+ },
+ {
+ "epoch": 0.79222581660398,
+ "grad_norm": 24953.568359375,
+ "learning_rate": 2.4863667595957325e-05,
+ "loss": 0.4083,
+ "step": 153550
+ },
+ {
+ "epoch": 0.7924837865865928,
+ "grad_norm": 24928.2265625,
+ "learning_rate": 2.483209598832736e-05,
+ "loss": 0.4066,
+ "step": 153600
+ },
+ {
+ "epoch": 0.7927417565692056,
+ "grad_norm": 24045.166015625,
+ "learning_rate": 2.4800537814837227e-05,
+ "loss": 0.4056,
+ "step": 153650
+ },
+ {
+ "epoch": 0.7929997265518184,
+ "grad_norm": 24591.826171875,
+ "learning_rate": 2.476899309233205e-05,
+ "loss": 0.4094,
+ "step": 153700
+ },
+ {
+ "epoch": 0.7932576965344312,
+ "grad_norm": 23336.810546875,
+ "learning_rate": 2.4737461837649782e-05,
+ "loss": 0.41,
+ "step": 153750
+ },
+ {
+ "epoch": 0.7935156665170441,
+ "grad_norm": 23454.171875,
+ "learning_rate": 2.4705944067621216e-05,
+ "loss": 0.4068,
+ "step": 153800
+ },
+ {
+ "epoch": 0.7937736364996569,
+ "grad_norm": 25322.201171875,
+ "learning_rate": 2.467443979906991e-05,
+ "loss": 0.4097,
+ "step": 153850
+ },
+ {
+ "epoch": 0.7940316064822697,
+ "grad_norm": 24731.580078125,
+ "learning_rate": 2.464294904881222e-05,
+ "loss": 0.4028,
+ "step": 153900
+ },
+ {
+ "epoch": 0.7942895764648825,
+ "grad_norm": 21753.568359375,
+ "learning_rate": 2.4611471833657356e-05,
+ "loss": 0.4148,
+ "step": 153950
+ },
+ {
+ "epoch": 0.7945475464474954,
+ "grad_norm": 26548.966796875,
+ "learning_rate": 2.458000817040717e-05,
+ "loss": 0.4074,
+ "step": 154000
+ },
+ {
+ "epoch": 0.7948055164301082,
+ "grad_norm": 21149.470703125,
+ "learning_rate": 2.4548558075856414e-05,
+ "loss": 0.408,
+ "step": 154050
+ },
+ {
+ "epoch": 0.7950634864127211,
+ "grad_norm": 25742.859375,
+ "learning_rate": 2.4517121566792517e-05,
+ "loss": 0.405,
+ "step": 154100
+ },
+ {
+ "epoch": 0.7953214563953338,
+ "grad_norm": 20954.91796875,
+ "learning_rate": 2.4485698659995658e-05,
+ "loss": 0.3975,
+ "step": 154150
+ },
+ {
+ "epoch": 0.7955794263779467,
+ "grad_norm": 23551.646484375,
+ "learning_rate": 2.445428937223884e-05,
+ "loss": 0.4059,
+ "step": 154200
+ },
+ {
+ "epoch": 0.7958373963605595,
+ "grad_norm": 25214.693359375,
+ "learning_rate": 2.4422893720287654e-05,
+ "loss": 0.4008,
+ "step": 154250
+ },
+ {
+ "epoch": 0.7960953663431724,
+ "grad_norm": 25346.916015625,
+ "learning_rate": 2.4391511720900545e-05,
+ "loss": 0.4035,
+ "step": 154300
+ },
+ {
+ "epoch": 0.7963533363257851,
+ "grad_norm": 21641.23828125,
+ "learning_rate": 2.43601433908286e-05,
+ "loss": 0.4069,
+ "step": 154350
+ },
+ {
+ "epoch": 0.7966113063083979,
+ "grad_norm": 22860.998046875,
+ "learning_rate": 2.4328788746815628e-05,
+ "loss": 0.4022,
+ "step": 154400
+ },
+ {
+ "epoch": 0.7968692762910108,
+ "grad_norm": 21989.96484375,
+ "learning_rate": 2.429744780559813e-05,
+ "loss": 0.4055,
+ "step": 154450
+ },
+ {
+ "epoch": 0.7971272462736236,
+ "grad_norm": 24413.74609375,
+ "learning_rate": 2.4266120583905272e-05,
+ "loss": 0.412,
+ "step": 154500
+ },
+ {
+ "epoch": 0.7973852162562364,
+ "grad_norm": 24805.859375,
+ "learning_rate": 2.4234807098458957e-05,
+ "loss": 0.41,
+ "step": 154550
+ },
+ {
+ "epoch": 0.7976431862388492,
+ "grad_norm": 23658.326171875,
+ "learning_rate": 2.42035073659737e-05,
+ "loss": 0.41,
+ "step": 154600
+ },
+ {
+ "epoch": 0.7979011562214621,
+ "grad_norm": 25225.228515625,
+ "learning_rate": 2.417222140315669e-05,
+ "loss": 0.4069,
+ "step": 154650
+ },
+ {
+ "epoch": 0.7981591262040749,
+ "grad_norm": 23417.3828125,
+ "learning_rate": 2.414094922670777e-05,
+ "loss": 0.4102,
+ "step": 154700
+ },
+ {
+ "epoch": 0.7984170961866878,
+ "grad_norm": 25014.5078125,
+ "learning_rate": 2.4109690853319422e-05,
+ "loss": 0.412,
+ "step": 154750
+ },
+ {
+ "epoch": 0.7986750661693005,
+ "grad_norm": 25523.3125,
+ "learning_rate": 2.407844629967674e-05,
+ "loss": 0.4102,
+ "step": 154800
+ },
+ {
+ "epoch": 0.7989330361519134,
+ "grad_norm": 23173.44921875,
+ "learning_rate": 2.404721558245752e-05,
+ "loss": 0.407,
+ "step": 154850
+ },
+ {
+ "epoch": 0.7991910061345262,
+ "grad_norm": 24673.5078125,
+ "learning_rate": 2.401599871833204e-05,
+ "loss": 0.4054,
+ "step": 154900
+ },
+ {
+ "epoch": 0.799448976117139,
+ "grad_norm": 24709.765625,
+ "learning_rate": 2.398479572396331e-05,
+ "loss": 0.4097,
+ "step": 154950
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "grad_norm": 22404.29296875,
+ "learning_rate": 2.395360661600687e-05,
+ "loss": 0.4072,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "eval_loss": 0.39372530579566956,
+ "eval_runtime": 3195.8879,
+ "eval_samples_per_second": 970.347,
+ "eval_steps_per_second": 1.895,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7999649160823646,
+ "grad_norm": 24004.09375,
+ "learning_rate": 2.3922431411110834e-05,
+ "loss": 0.4016,
+ "step": 155050
+ },
+ {
+ "epoch": 0.8002228860649775,
+ "grad_norm": 25013.6484375,
+ "learning_rate": 2.3891270125915992e-05,
+ "loss": 0.4068,
+ "step": 155100
+ },
+ {
+ "epoch": 0.8004808560475903,
+ "grad_norm": 23532.982421875,
+ "learning_rate": 2.3860122777055553e-05,
+ "loss": 0.4036,
+ "step": 155150
+ },
+ {
+ "epoch": 0.8007388260302031,
+ "grad_norm": 27413.044921875,
+ "learning_rate": 2.3828989381155426e-05,
+ "loss": 0.4098,
+ "step": 155200
+ },
+ {
+ "epoch": 0.8009967960128159,
+ "grad_norm": 25821.794921875,
+ "learning_rate": 2.379786995483399e-05,
+ "loss": 0.4076,
+ "step": 155250
+ },
+ {
+ "epoch": 0.8012547659954288,
+ "grad_norm": 23864.154296875,
+ "learning_rate": 2.37667645147022e-05,
+ "loss": 0.4082,
+ "step": 155300
+ },
+ {
+ "epoch": 0.8015127359780416,
+ "grad_norm": 22892.451171875,
+ "learning_rate": 2.3735673077363534e-05,
+ "loss": 0.4116,
+ "step": 155350
+ },
+ {
+ "epoch": 0.8017707059606545,
+ "grad_norm": 24638.51953125,
+ "learning_rate": 2.3704595659413987e-05,
+ "loss": 0.4015,
+ "step": 155400
+ },
+ {
+ "epoch": 0.8020286759432672,
+ "grad_norm": 23007.734375,
+ "learning_rate": 2.3673532277442112e-05,
+ "loss": 0.4075,
+ "step": 155450
+ },
+ {
+ "epoch": 0.8022866459258801,
+ "grad_norm": 25629.17578125,
+ "learning_rate": 2.364248294802892e-05,
+ "loss": 0.4031,
+ "step": 155500
+ },
+ {
+ "epoch": 0.8025446159084929,
+ "grad_norm": 23949.939453125,
+ "learning_rate": 2.3611447687747955e-05,
+ "loss": 0.4091,
+ "step": 155550
+ },
+ {
+ "epoch": 0.8028025858911058,
+ "grad_norm": 23120.3515625,
+ "learning_rate": 2.3580426513165228e-05,
+ "loss": 0.4106,
+ "step": 155600
+ },
+ {
+ "epoch": 0.8030605558737185,
+ "grad_norm": 26965.955078125,
+ "learning_rate": 2.3549419440839236e-05,
+ "loss": 0.4054,
+ "step": 155650
+ },
+ {
+ "epoch": 0.8033185258563313,
+ "grad_norm": 23370.33984375,
+ "learning_rate": 2.3518426487320948e-05,
+ "loss": 0.407,
+ "step": 155700
+ },
+ {
+ "epoch": 0.8035764958389442,
+ "grad_norm": 22571.12890625,
+ "learning_rate": 2.3487447669153833e-05,
+ "loss": 0.4118,
+ "step": 155750
+ },
+ {
+ "epoch": 0.803834465821557,
+ "grad_norm": 24092.56640625,
+ "learning_rate": 2.3456483002873768e-05,
+ "loss": 0.4053,
+ "step": 155800
+ },
+ {
+ "epoch": 0.8040924358041698,
+ "grad_norm": 24549.140625,
+ "learning_rate": 2.3425532505009072e-05,
+ "loss": 0.405,
+ "step": 155850
+ },
+ {
+ "epoch": 0.8043504057867826,
+ "grad_norm": 23510.904296875,
+ "learning_rate": 2.3394596192080574e-05,
+ "loss": 0.4049,
+ "step": 155900
+ },
+ {
+ "epoch": 0.8046083757693955,
+ "grad_norm": 23147.369140625,
+ "learning_rate": 2.3363674080601416e-05,
+ "loss": 0.4032,
+ "step": 155950
+ },
+ {
+ "epoch": 0.8048663457520083,
+ "grad_norm": 21877.10546875,
+ "learning_rate": 2.3332766187077264e-05,
+ "loss": 0.4006,
+ "step": 156000
+ },
+ {
+ "epoch": 0.8051243157346211,
+ "grad_norm": 24041.384765625,
+ "learning_rate": 2.330187252800614e-05,
+ "loss": 0.4056,
+ "step": 156050
+ },
+ {
+ "epoch": 0.8053822857172339,
+ "grad_norm": 23452.453125,
+ "learning_rate": 2.327099311987848e-05,
+ "loss": 0.4071,
+ "step": 156100
+ },
+ {
+ "epoch": 0.8056402556998468,
+ "grad_norm": 23023.5859375,
+ "learning_rate": 2.3240127979177123e-05,
+ "loss": 0.4095,
+ "step": 156150
+ },
+ {
+ "epoch": 0.8058982256824596,
+ "grad_norm": 23684.615234375,
+ "learning_rate": 2.3209277122377255e-05,
+ "loss": 0.4023,
+ "step": 156200
+ },
+ {
+ "epoch": 0.8061561956650725,
+ "grad_norm": 22598.732421875,
+ "learning_rate": 2.31784405659465e-05,
+ "loss": 0.4013,
+ "step": 156250
+ },
+ {
+ "epoch": 0.8064141656476852,
+ "grad_norm": 21835.93359375,
+ "learning_rate": 2.3147618326344804e-05,
+ "loss": 0.4072,
+ "step": 156300
+ },
+ {
+ "epoch": 0.806672135630298,
+ "grad_norm": 26343.41015625,
+ "learning_rate": 2.311681042002448e-05,
+ "loss": 0.4154,
+ "step": 156350
+ },
+ {
+ "epoch": 0.8069301056129109,
+ "grad_norm": 24116.162109375,
+ "learning_rate": 2.3086016863430193e-05,
+ "loss": 0.4032,
+ "step": 156400
+ },
+ {
+ "epoch": 0.8071880755955237,
+ "grad_norm": 23874.53515625,
+ "learning_rate": 2.3055237672998946e-05,
+ "loss": 0.4063,
+ "step": 156450
+ },
+ {
+ "epoch": 0.8074460455781365,
+ "grad_norm": 25624.203125,
+ "learning_rate": 2.302447286516006e-05,
+ "loss": 0.4034,
+ "step": 156500
+ },
+ {
+ "epoch": 0.8077040155607493,
+ "grad_norm": 22652.2109375,
+ "learning_rate": 2.2993722456335236e-05,
+ "loss": 0.4049,
+ "step": 156550
+ },
+ {
+ "epoch": 0.8079619855433622,
+ "grad_norm": 26234.255859375,
+ "learning_rate": 2.2962986462938385e-05,
+ "loss": 0.4035,
+ "step": 156600
+ },
+ {
+ "epoch": 0.808219955525975,
+ "grad_norm": 24374.974609375,
+ "learning_rate": 2.293226490137584e-05,
+ "loss": 0.4052,
+ "step": 156650
+ },
+ {
+ "epoch": 0.8084779255085878,
+ "grad_norm": 24195.4296875,
+ "learning_rate": 2.2901557788046146e-05,
+ "loss": 0.4072,
+ "step": 156700
+ },
+ {
+ "epoch": 0.8087358954912006,
+ "grad_norm": 24590.525390625,
+ "learning_rate": 2.2870865139340165e-05,
+ "loss": 0.4092,
+ "step": 156750
+ },
+ {
+ "epoch": 0.8089938654738135,
+ "grad_norm": 20863.509765625,
+ "learning_rate": 2.2840186971641083e-05,
+ "loss": 0.4073,
+ "step": 156800
+ },
+ {
+ "epoch": 0.8092518354564263,
+ "grad_norm": 23662.16015625,
+ "learning_rate": 2.2809523301324238e-05,
+ "loss": 0.4101,
+ "step": 156850
+ },
+ {
+ "epoch": 0.8095098054390392,
+ "grad_norm": 21700.666015625,
+ "learning_rate": 2.2778874144757357e-05,
+ "loss": 0.4075,
+ "step": 156900
+ },
+ {
+ "epoch": 0.8097677754216519,
+ "grad_norm": 29026.71484375,
+ "learning_rate": 2.274823951830036e-05,
+ "loss": 0.4005,
+ "step": 156950
+ },
+ {
+ "epoch": 0.8100257454042648,
+ "grad_norm": 27310.48828125,
+ "learning_rate": 2.2717619438305397e-05,
+ "loss": 0.4058,
+ "step": 157000
+ },
+ {
+ "epoch": 0.8102837153868776,
+ "grad_norm": 25008.673828125,
+ "learning_rate": 2.2687013921116895e-05,
+ "loss": 0.404,
+ "step": 157050
+ },
+ {
+ "epoch": 0.8105416853694904,
+ "grad_norm": 22623.57421875,
+ "learning_rate": 2.2656422983071452e-05,
+ "loss": 0.4059,
+ "step": 157100
+ },
+ {
+ "epoch": 0.8107996553521032,
+ "grad_norm": 23960.427734375,
+ "learning_rate": 2.2625846640497965e-05,
+ "loss": 0.4096,
+ "step": 157150
+ },
+ {
+ "epoch": 0.811057625334716,
+ "grad_norm": 22415.021484375,
+ "learning_rate": 2.2595284909717475e-05,
+ "loss": 0.4061,
+ "step": 157200
+ },
+ {
+ "epoch": 0.8113155953173289,
+ "grad_norm": 23358.822265625,
+ "learning_rate": 2.2564737807043233e-05,
+ "loss": 0.4003,
+ "step": 157250
+ },
+ {
+ "epoch": 0.8115735652999417,
+ "grad_norm": 21686.9765625,
+ "learning_rate": 2.2534205348780702e-05,
+ "loss": 0.4063,
+ "step": 157300
+ },
+ {
+ "epoch": 0.8118315352825545,
+ "grad_norm": 22949.484375,
+ "learning_rate": 2.2503687551227504e-05,
+ "loss": 0.407,
+ "step": 157350
+ },
+ {
+ "epoch": 0.8120895052651673,
+ "grad_norm": 21776.201171875,
+ "learning_rate": 2.2473184430673444e-05,
+ "loss": 0.4073,
+ "step": 157400
+ },
+ {
+ "epoch": 0.8123474752477802,
+ "grad_norm": 25641.17578125,
+ "learning_rate": 2.244269600340055e-05,
+ "loss": 0.4074,
+ "step": 157450
+ },
+ {
+ "epoch": 0.812605445230393,
+ "grad_norm": 22723.42578125,
+ "learning_rate": 2.2412222285682867e-05,
+ "loss": 0.4119,
+ "step": 157500
+ },
+ {
+ "epoch": 0.8128634152130059,
+ "grad_norm": 24244.48046875,
+ "learning_rate": 2.2381763293786746e-05,
+ "loss": 0.4157,
+ "step": 157550
+ },
+ {
+ "epoch": 0.8131213851956186,
+ "grad_norm": 26826.337890625,
+ "learning_rate": 2.235131904397058e-05,
+ "loss": 0.4102,
+ "step": 157600
+ },
+ {
+ "epoch": 0.8133793551782315,
+ "grad_norm": 23157.0546875,
+ "learning_rate": 2.232088955248491e-05,
+ "loss": 0.4121,
+ "step": 157650
+ },
+ {
+ "epoch": 0.8136373251608443,
+ "grad_norm": 23352.009765625,
+ "learning_rate": 2.229047483557245e-05,
+ "loss": 0.4054,
+ "step": 157700
+ },
+ {
+ "epoch": 0.8138952951434572,
+ "grad_norm": 24417.2734375,
+ "learning_rate": 2.2260074909467925e-05,
+ "loss": 0.4092,
+ "step": 157750
+ },
+ {
+ "epoch": 0.8141532651260699,
+ "grad_norm": 22345.669921875,
+ "learning_rate": 2.2229689790398283e-05,
+ "loss": 0.402,
+ "step": 157800
+ },
+ {
+ "epoch": 0.8144112351086827,
+ "grad_norm": 22904.20703125,
+ "learning_rate": 2.2199319494582492e-05,
+ "loss": 0.4067,
+ "step": 157850
+ },
+ {
+ "epoch": 0.8146692050912956,
+ "grad_norm": 24132.306640625,
+ "learning_rate": 2.216896403823162e-05,
+ "loss": 0.4094,
+ "step": 157900
+ },
+ {
+ "epoch": 0.8149271750739084,
+ "grad_norm": 24649.001953125,
+ "learning_rate": 2.2138623437548833e-05,
+ "loss": 0.4048,
+ "step": 157950
+ },
+ {
+ "epoch": 0.8151851450565212,
+ "grad_norm": 24956.458984375,
+ "learning_rate": 2.210829770872933e-05,
+ "loss": 0.4038,
+ "step": 158000
+ },
+ {
+ "epoch": 0.815443115039134,
+ "grad_norm": 24047.3515625,
+ "learning_rate": 2.2077986867960437e-05,
+ "loss": 0.407,
+ "step": 158050
+ },
+ {
+ "epoch": 0.8157010850217469,
+ "grad_norm": 22895.953125,
+ "learning_rate": 2.2047690931421476e-05,
+ "loss": 0.4033,
+ "step": 158100
+ },
+ {
+ "epoch": 0.8159590550043597,
+ "grad_norm": 22524.640625,
+ "learning_rate": 2.201740991528383e-05,
+ "loss": 0.4136,
+ "step": 158150
+ },
+ {
+ "epoch": 0.8162170249869725,
+ "grad_norm": 22507.46875,
+ "learning_rate": 2.1987143835710928e-05,
+ "loss": 0.4043,
+ "step": 158200
+ },
+ {
+ "epoch": 0.8164749949695853,
+ "grad_norm": 24044.5390625,
+ "learning_rate": 2.1956892708858202e-05,
+ "loss": 0.4099,
+ "step": 158250
+ },
+ {
+ "epoch": 0.8167329649521982,
+ "grad_norm": 26112.05859375,
+ "learning_rate": 2.1926656550873103e-05,
+ "loss": 0.4087,
+ "step": 158300
+ },
+ {
+ "epoch": 0.816990934934811,
+ "grad_norm": 25168.59375,
+ "learning_rate": 2.189643537789517e-05,
+ "loss": 0.4059,
+ "step": 158350
+ },
+ {
+ "epoch": 0.8172489049174239,
+ "grad_norm": 31289.392578125,
+ "learning_rate": 2.1866229206055804e-05,
+ "loss": 0.4048,
+ "step": 158400
+ },
+ {
+ "epoch": 0.8175068749000366,
+ "grad_norm": 27301.970703125,
+ "learning_rate": 2.1836038051478508e-05,
+ "loss": 0.4111,
+ "step": 158450
+ },
+ {
+ "epoch": 0.8177648448826494,
+ "grad_norm": 22742.66015625,
+ "learning_rate": 2.180586193027877e-05,
+ "loss": 0.3998,
+ "step": 158500
+ },
+ {
+ "epoch": 0.8180228148652623,
+ "grad_norm": 26745.51171875,
+ "learning_rate": 2.177570085856395e-05,
+ "loss": 0.4069,
+ "step": 158550
+ },
+ {
+ "epoch": 0.8182807848478751,
+ "grad_norm": 24821.93359375,
+ "learning_rate": 2.1745554852433502e-05,
+ "loss": 0.4057,
+ "step": 158600
+ },
+ {
+ "epoch": 0.8185387548304879,
+ "grad_norm": 24082.908203125,
+ "learning_rate": 2.1715423927978755e-05,
+ "loss": 0.4042,
+ "step": 158650
+ },
+ {
+ "epoch": 0.8187967248131007,
+ "grad_norm": 23584.001953125,
+ "learning_rate": 2.168530810128302e-05,
+ "loss": 0.4062,
+ "step": 158700
+ },
+ {
+ "epoch": 0.8190546947957136,
+ "grad_norm": 25795.326171875,
+ "learning_rate": 2.1655207388421532e-05,
+ "loss": 0.4101,
+ "step": 158750
+ },
+ {
+ "epoch": 0.8193126647783264,
+ "grad_norm": 22298.908203125,
+ "learning_rate": 2.1625121805461483e-05,
+ "loss": 0.4004,
+ "step": 158800
+ },
+ {
+ "epoch": 0.8195706347609392,
+ "grad_norm": 24439.970703125,
+ "learning_rate": 2.1595051368461943e-05,
+ "loss": 0.4078,
+ "step": 158850
+ },
+ {
+ "epoch": 0.819828604743552,
+ "grad_norm": 24895.5546875,
+ "learning_rate": 2.1564996093473975e-05,
+ "loss": 0.4008,
+ "step": 158900
+ },
+ {
+ "epoch": 0.8200865747261649,
+ "grad_norm": 27615.1171875,
+ "learning_rate": 2.153495599654048e-05,
+ "loss": 0.4051,
+ "step": 158950
+ },
+ {
+ "epoch": 0.8203445447087777,
+ "grad_norm": 22537.25390625,
+ "learning_rate": 2.150493109369628e-05,
+ "loss": 0.4078,
+ "step": 159000
+ },
+ {
+ "epoch": 0.8206025146913906,
+ "grad_norm": 23422.39453125,
+ "learning_rate": 2.1474921400968085e-05,
+ "loss": 0.3999,
+ "step": 159050
+ },
+ {
+ "epoch": 0.8208604846740033,
+ "grad_norm": 24678.099609375,
+ "learning_rate": 2.1444926934374475e-05,
+ "loss": 0.4038,
+ "step": 159100
+ },
+ {
+ "epoch": 0.8211184546566161,
+ "grad_norm": 25680.623046875,
+ "learning_rate": 2.1414947709925963e-05,
+ "loss": 0.4082,
+ "step": 159150
+ },
+ {
+ "epoch": 0.821376424639229,
+ "grad_norm": 26526.724609375,
+ "learning_rate": 2.1384983743624813e-05,
+ "loss": 0.4076,
+ "step": 159200
+ },
+ {
+ "epoch": 0.8216343946218418,
+ "grad_norm": 21391.701171875,
+ "learning_rate": 2.1355035051465265e-05,
+ "loss": 0.4003,
+ "step": 159250
+ },
+ {
+ "epoch": 0.8218923646044546,
+ "grad_norm": 22676.607421875,
+ "learning_rate": 2.1325101649433327e-05,
+ "loss": 0.4087,
+ "step": 159300
+ },
+ {
+ "epoch": 0.8221503345870674,
+ "grad_norm": 23139.802734375,
+ "learning_rate": 2.1295183553506855e-05,
+ "loss": 0.4102,
+ "step": 159350
+ },
+ {
+ "epoch": 0.8224083045696803,
+ "grad_norm": 23598.369140625,
+ "learning_rate": 2.1265280779655593e-05,
+ "loss": 0.4027,
+ "step": 159400
+ },
+ {
+ "epoch": 0.8226662745522931,
+ "grad_norm": 24068.453125,
+ "learning_rate": 2.1235393343841008e-05,
+ "loss": 0.4097,
+ "step": 159450
+ },
+ {
+ "epoch": 0.8229242445349059,
+ "grad_norm": 26833.779296875,
+ "learning_rate": 2.1205521262016476e-05,
+ "loss": 0.4094,
+ "step": 159500
+ },
+ {
+ "epoch": 0.8231822145175187,
+ "grad_norm": 21122.98046875,
+ "learning_rate": 2.1175664550127123e-05,
+ "loss": 0.4074,
+ "step": 159550
+ },
+ {
+ "epoch": 0.8234401845001316,
+ "grad_norm": 24398.310546875,
+ "learning_rate": 2.1145823224109884e-05,
+ "loss": 0.4081,
+ "step": 159600
+ },
+ {
+ "epoch": 0.8236981544827444,
+ "grad_norm": 20830.05078125,
+ "learning_rate": 2.111599729989348e-05,
+ "loss": 0.4031,
+ "step": 159650
+ },
+ {
+ "epoch": 0.8239561244653573,
+ "grad_norm": 24353.29296875,
+ "learning_rate": 2.108618679339841e-05,
+ "loss": 0.4037,
+ "step": 159700
+ },
+ {
+ "epoch": 0.82421409444797,
+ "grad_norm": 22828.130859375,
+ "learning_rate": 2.1056391720536928e-05,
+ "loss": 0.4021,
+ "step": 159750
+ },
+ {
+ "epoch": 0.8244720644305829,
+ "grad_norm": 21661.53515625,
+ "learning_rate": 2.1026612097213106e-05,
+ "loss": 0.4117,
+ "step": 159800
+ },
+ {
+ "epoch": 0.8247300344131957,
+ "grad_norm": 20191.279296875,
+ "learning_rate": 2.0996847939322707e-05,
+ "loss": 0.4088,
+ "step": 159850
+ },
+ {
+ "epoch": 0.8249880043958085,
+ "grad_norm": 23767.8125,
+ "learning_rate": 2.0967099262753258e-05,
+ "loss": 0.4035,
+ "step": 159900
+ },
+ {
+ "epoch": 0.8252459743784213,
+ "grad_norm": 24693.4609375,
+ "learning_rate": 2.093736608338405e-05,
+ "loss": 0.4135,
+ "step": 159950
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "grad_norm": 22759.341796875,
+ "learning_rate": 2.0907648417086027e-05,
+ "loss": 0.4048,
+ "step": 160000
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "eval_loss": 0.3925068974494934,
+ "eval_runtime": 3187.046,
+ "eval_samples_per_second": 973.039,
+ "eval_steps_per_second": 1.901,
+ "step": 160000
+ },
+ {
+ "epoch": 0.825761914343647,
+ "grad_norm": 25066.45703125,
+ "learning_rate": 2.0877946279721983e-05,
+ "loss": 0.4017,
+ "step": 160050
+ },
+ {
+ "epoch": 0.8260198843262598,
+ "grad_norm": 24734.384765625,
+ "learning_rate": 2.084825968714626e-05,
+ "loss": 0.4091,
+ "step": 160100
+ },
+ {
+ "epoch": 0.8262778543088726,
+ "grad_norm": 26498.201171875,
+ "learning_rate": 2.0818588655205045e-05,
+ "loss": 0.4028,
+ "step": 160150
+ },
+ {
+ "epoch": 0.8265358242914854,
+ "grad_norm": 23436.36328125,
+ "learning_rate": 2.0788933199736143e-05,
+ "loss": 0.4019,
+ "step": 160200
+ },
+ {
+ "epoch": 0.8267937942740983,
+ "grad_norm": 23851.89453125,
+ "learning_rate": 2.075929333656904e-05,
+ "loss": 0.4055,
+ "step": 160250
+ },
+ {
+ "epoch": 0.8270517642567111,
+ "grad_norm": 23416.0625,
+ "learning_rate": 2.0729669081524977e-05,
+ "loss": 0.4075,
+ "step": 160300
+ },
+ {
+ "epoch": 0.8273097342393239,
+ "grad_norm": 22208.994140625,
+ "learning_rate": 2.070006045041673e-05,
+ "loss": 0.4047,
+ "step": 160350
+ },
+ {
+ "epoch": 0.8275677042219367,
+ "grad_norm": 21291.3515625,
+ "learning_rate": 2.067046745904888e-05,
+ "loss": 0.405,
+ "step": 160400
+ },
+ {
+ "epoch": 0.8278256742045496,
+ "grad_norm": 24646.279296875,
+ "learning_rate": 2.0640890123217565e-05,
+ "loss": 0.4076,
+ "step": 160450
+ },
+ {
+ "epoch": 0.8280836441871624,
+ "grad_norm": 22018.609375,
+ "learning_rate": 2.0611328458710595e-05,
+ "loss": 0.406,
+ "step": 160500
+ },
+ {
+ "epoch": 0.8283416141697753,
+ "grad_norm": 30070.40234375,
+ "learning_rate": 2.0581782481307415e-05,
+ "loss": 0.4099,
+ "step": 160550
+ },
+ {
+ "epoch": 0.828599584152388,
+ "grad_norm": 24574.34375,
+ "learning_rate": 2.0552252206779098e-05,
+ "loss": 0.4035,
+ "step": 160600
+ },
+ {
+ "epoch": 0.8288575541350008,
+ "grad_norm": 23137.224609375,
+ "learning_rate": 2.0522737650888313e-05,
+ "loss": 0.4006,
+ "step": 160650
+ },
+ {
+ "epoch": 0.8291155241176137,
+ "grad_norm": 22633.23828125,
+ "learning_rate": 2.0493238829389393e-05,
+ "loss": 0.4064,
+ "step": 160700
+ },
+ {
+ "epoch": 0.8293734941002265,
+ "grad_norm": 23670.525390625,
+ "learning_rate": 2.046375575802822e-05,
+ "loss": 0.4084,
+ "step": 160750
+ },
+ {
+ "epoch": 0.8296314640828393,
+ "grad_norm": 24236.7890625,
+ "learning_rate": 2.043428845254229e-05,
+ "loss": 0.413,
+ "step": 160800
+ },
+ {
+ "epoch": 0.8298894340654521,
+ "grad_norm": 25734.12890625,
+ "learning_rate": 2.0404836928660676e-05,
+ "loss": 0.3992,
+ "step": 160850
+ },
+ {
+ "epoch": 0.830147404048065,
+ "grad_norm": 23417.83203125,
+ "learning_rate": 2.037540120210401e-05,
+ "loss": 0.4069,
+ "step": 160900
+ },
+ {
+ "epoch": 0.8304053740306778,
+ "grad_norm": 24619.853515625,
+ "learning_rate": 2.0345981288584575e-05,
+ "loss": 0.4002,
+ "step": 160950
+ },
+ {
+ "epoch": 0.8306633440132906,
+ "grad_norm": 21862.111328125,
+ "learning_rate": 2.031657720380608e-05,
+ "loss": 0.4012,
+ "step": 161000
+ },
+ {
+ "epoch": 0.8309213139959034,
+ "grad_norm": 23347.91015625,
+ "learning_rate": 2.0287188963463906e-05,
+ "loss": 0.4061,
+ "step": 161050
+ },
+ {
+ "epoch": 0.8311792839785163,
+ "grad_norm": 25119.107421875,
+ "learning_rate": 2.02578165832449e-05,
+ "loss": 0.4061,
+ "step": 161100
+ },
+ {
+ "epoch": 0.8314372539611291,
+ "grad_norm": 22684.50390625,
+ "learning_rate": 2.0228460078827466e-05,
+ "loss": 0.4062,
+ "step": 161150
+ },
+ {
+ "epoch": 0.831695223943742,
+ "grad_norm": 39309.30859375,
+ "learning_rate": 2.0199119465881565e-05,
+ "loss": 0.4091,
+ "step": 161200
+ },
+ {
+ "epoch": 0.8319531939263547,
+ "grad_norm": 22076.8125,
+ "learning_rate": 2.0169794760068632e-05,
+ "loss": 0.4052,
+ "step": 161250
+ },
+ {
+ "epoch": 0.8322111639089675,
+ "grad_norm": 26682.44140625,
+ "learning_rate": 2.0140485977041636e-05,
+ "loss": 0.405,
+ "step": 161300
+ },
+ {
+ "epoch": 0.8324691338915804,
+ "grad_norm": 24586.09375,
+ "learning_rate": 2.011119313244502e-05,
+ "loss": 0.4066,
+ "step": 161350
+ },
+ {
+ "epoch": 0.8327271038741932,
+ "grad_norm": 26363.5703125,
+ "learning_rate": 2.008191624191475e-05,
+ "loss": 0.4027,
+ "step": 161400
+ },
+ {
+ "epoch": 0.832985073856806,
+ "grad_norm": 24361.9921875,
+ "learning_rate": 2.0052655321078246e-05,
+ "loss": 0.4041,
+ "step": 161450
+ },
+ {
+ "epoch": 0.8332430438394188,
+ "grad_norm": 22026.951171875,
+ "learning_rate": 2.0023410385554466e-05,
+ "loss": 0.4068,
+ "step": 161500
+ },
+ {
+ "epoch": 0.8335010138220317,
+ "grad_norm": 24540.068359375,
+ "learning_rate": 1.9994181450953725e-05,
+ "loss": 0.4036,
+ "step": 161550
+ },
+ {
+ "epoch": 0.8337589838046445,
+ "grad_norm": 25837.857421875,
+ "learning_rate": 1.9964968532877916e-05,
+ "loss": 0.4052,
+ "step": 161600
+ },
+ {
+ "epoch": 0.8340169537872573,
+ "grad_norm": 23252.900390625,
+ "learning_rate": 1.993577164692031e-05,
+ "loss": 0.4021,
+ "step": 161650
+ },
+ {
+ "epoch": 0.8342749237698701,
+ "grad_norm": 25305.177734375,
+ "learning_rate": 1.990659080866562e-05,
+ "loss": 0.4089,
+ "step": 161700
+ },
+ {
+ "epoch": 0.834532893752483,
+ "grad_norm": 25317.89453125,
+ "learning_rate": 1.9877426033690066e-05,
+ "loss": 0.4082,
+ "step": 161750
+ },
+ {
+ "epoch": 0.8347908637350958,
+ "grad_norm": 25872.2109375,
+ "learning_rate": 1.984827733756117e-05,
+ "loss": 0.4021,
+ "step": 161800
+ },
+ {
+ "epoch": 0.8350488337177087,
+ "grad_norm": 23915.955078125,
+ "learning_rate": 1.9819144735837998e-05,
+ "loss": 0.4054,
+ "step": 161850
+ },
+ {
+ "epoch": 0.8353068037003214,
+ "grad_norm": 25145.380859375,
+ "learning_rate": 1.9790028244070946e-05,
+ "loss": 0.4119,
+ "step": 161900
+ },
+ {
+ "epoch": 0.8355647736829342,
+ "grad_norm": 24318.28125,
+ "learning_rate": 1.976092787780184e-05,
+ "loss": 0.4015,
+ "step": 161950
+ },
+ {
+ "epoch": 0.8358227436655471,
+ "grad_norm": 22675.845703125,
+ "learning_rate": 1.973184365256388e-05,
+ "loss": 0.4107,
+ "step": 162000
+ },
+ {
+ "epoch": 0.83608071364816,
+ "grad_norm": 23785.451171875,
+ "learning_rate": 1.9702775583881656e-05,
+ "loss": 0.408,
+ "step": 162050
+ },
+ {
+ "epoch": 0.8363386836307727,
+ "grad_norm": 22790.47265625,
+ "learning_rate": 1.9673723687271174e-05,
+ "loss": 0.406,
+ "step": 162100
+ },
+ {
+ "epoch": 0.8365966536133855,
+ "grad_norm": 24380.498046875,
+ "learning_rate": 1.9644687978239746e-05,
+ "loss": 0.4105,
+ "step": 162150
+ },
+ {
+ "epoch": 0.8368546235959984,
+ "grad_norm": 23812.814453125,
+ "learning_rate": 1.9615668472286085e-05,
+ "loss": 0.4032,
+ "step": 162200
+ },
+ {
+ "epoch": 0.8371125935786112,
+ "grad_norm": 22820.734375,
+ "learning_rate": 1.9586665184900232e-05,
+ "loss": 0.4072,
+ "step": 162250
+ },
+ {
+ "epoch": 0.837370563561224,
+ "grad_norm": 22347.779296875,
+ "learning_rate": 1.955767813156359e-05,
+ "loss": 0.4045,
+ "step": 162300
+ },
+ {
+ "epoch": 0.8376285335438368,
+ "grad_norm": 24328.546875,
+ "learning_rate": 1.9528707327748852e-05,
+ "loss": 0.4097,
+ "step": 162350
+ },
+ {
+ "epoch": 0.8378865035264497,
+ "grad_norm": 23850.13671875,
+ "learning_rate": 1.9499752788920146e-05,
+ "loss": 0.4085,
+ "step": 162400
+ },
+ {
+ "epoch": 0.8381444735090625,
+ "grad_norm": 24967.3203125,
+ "learning_rate": 1.9470814530532756e-05,
+ "loss": 0.4056,
+ "step": 162450
+ },
+ {
+ "epoch": 0.8384024434916753,
+ "grad_norm": 23740.197265625,
+ "learning_rate": 1.9441892568033426e-05,
+ "loss": 0.4112,
+ "step": 162500
+ },
+ {
+ "epoch": 0.8386604134742881,
+ "grad_norm": 26039.447265625,
+ "learning_rate": 1.941298691686012e-05,
+ "loss": 0.405,
+ "step": 162550
+ },
+ {
+ "epoch": 0.838918383456901,
+ "grad_norm": 22781.23828125,
+ "learning_rate": 1.9384097592442102e-05,
+ "loss": 0.4043,
+ "step": 162600
+ },
+ {
+ "epoch": 0.8391763534395138,
+ "grad_norm": 25735.17578125,
+ "learning_rate": 1.935522461019998e-05,
+ "loss": 0.4021,
+ "step": 162650
+ },
+ {
+ "epoch": 0.8394343234221266,
+ "grad_norm": 26452.810546875,
+ "learning_rate": 1.932636798554552e-05,
+ "loss": 0.4093,
+ "step": 162700
+ },
+ {
+ "epoch": 0.8396922934047394,
+ "grad_norm": 24199.3515625,
+ "learning_rate": 1.929752773388189e-05,
+ "loss": 0.4003,
+ "step": 162750
+ },
+ {
+ "epoch": 0.8399502633873522,
+ "grad_norm": 27610.30859375,
+ "learning_rate": 1.9268703870603434e-05,
+ "loss": 0.4035,
+ "step": 162800
+ },
+ {
+ "epoch": 0.8402082333699651,
+ "grad_norm": 23799.3359375,
+ "learning_rate": 1.9239896411095777e-05,
+ "loss": 0.4072,
+ "step": 162850
+ },
+ {
+ "epoch": 0.8404662033525779,
+ "grad_norm": 24182.162109375,
+ "learning_rate": 1.9211105370735784e-05,
+ "loss": 0.4056,
+ "step": 162900
+ },
+ {
+ "epoch": 0.8407241733351907,
+ "grad_norm": 21251.0625,
+ "learning_rate": 1.918233076489153e-05,
+ "loss": 0.4073,
+ "step": 162950
+ },
+ {
+ "epoch": 0.8409821433178035,
+ "grad_norm": 22723.09765625,
+ "learning_rate": 1.9153572608922383e-05,
+ "loss": 0.4041,
+ "step": 163000
+ },
+ {
+ "epoch": 0.8412401133004164,
+ "grad_norm": 23557.125,
+ "learning_rate": 1.9124830918178876e-05,
+ "loss": 0.4064,
+ "step": 163050
+ },
+ {
+ "epoch": 0.8414980832830292,
+ "grad_norm": 24273.71484375,
+ "learning_rate": 1.9096105708002754e-05,
+ "loss": 0.4072,
+ "step": 163100
+ },
+ {
+ "epoch": 0.841756053265642,
+ "grad_norm": 24078.10546875,
+ "learning_rate": 1.9067396993726994e-05,
+ "loss": 0.409,
+ "step": 163150
+ },
+ {
+ "epoch": 0.8420140232482548,
+ "grad_norm": 23370.31640625,
+ "learning_rate": 1.9038704790675738e-05,
+ "loss": 0.4082,
+ "step": 163200
+ },
+ {
+ "epoch": 0.8422719932308677,
+ "grad_norm": 23478.564453125,
+ "learning_rate": 1.901002911416432e-05,
+ "loss": 0.4082,
+ "step": 163250
+ },
+ {
+ "epoch": 0.8425299632134805,
+ "grad_norm": 22697.802734375,
+ "learning_rate": 1.898136997949929e-05,
+ "loss": 0.4107,
+ "step": 163300
+ },
+ {
+ "epoch": 0.8427879331960934,
+ "grad_norm": 25571.9765625,
+ "learning_rate": 1.8952727401978326e-05,
+ "loss": 0.3996,
+ "step": 163350
+ },
+ {
+ "epoch": 0.8430459031787061,
+ "grad_norm": 24950.283203125,
+ "learning_rate": 1.8924101396890264e-05,
+ "loss": 0.403,
+ "step": 163400
+ },
+ {
+ "epoch": 0.8433038731613189,
+ "grad_norm": 22436.380859375,
+ "learning_rate": 1.8895491979515162e-05,
+ "loss": 0.4041,
+ "step": 163450
+ },
+ {
+ "epoch": 0.8435618431439318,
+ "grad_norm": 25954.529296875,
+ "learning_rate": 1.8866899165124097e-05,
+ "loss": 0.4003,
+ "step": 163500
+ },
+ {
+ "epoch": 0.8438198131265446,
+ "grad_norm": 21477.8828125,
+ "learning_rate": 1.883832296897944e-05,
+ "loss": 0.4063,
+ "step": 163550
+ },
+ {
+ "epoch": 0.8440777831091574,
+ "grad_norm": 24669.7890625,
+ "learning_rate": 1.8809763406334535e-05,
+ "loss": 0.4049,
+ "step": 163600
+ },
+ {
+ "epoch": 0.8443357530917702,
+ "grad_norm": 27181.50390625,
+ "learning_rate": 1.878122049243398e-05,
+ "loss": 0.4007,
+ "step": 163650
+ },
+ {
+ "epoch": 0.8445937230743831,
+ "grad_norm": 25191.591796875,
+ "learning_rate": 1.8752694242513408e-05,
+ "loss": 0.4072,
+ "step": 163700
+ },
+ {
+ "epoch": 0.8448516930569959,
+ "grad_norm": 24557.42578125,
+ "learning_rate": 1.872418467179956e-05,
+ "loss": 0.4043,
+ "step": 163750
+ },
+ {
+ "epoch": 0.8451096630396087,
+ "grad_norm": 25135.6328125,
+ "learning_rate": 1.8695691795510335e-05,
+ "loss": 0.4008,
+ "step": 163800
+ },
+ {
+ "epoch": 0.8453676330222215,
+ "grad_norm": 23372.181640625,
+ "learning_rate": 1.8667215628854656e-05,
+ "loss": 0.4073,
+ "step": 163850
+ },
+ {
+ "epoch": 0.8456256030048344,
+ "grad_norm": 23332.65625,
+ "learning_rate": 1.8638756187032554e-05,
+ "loss": 0.3987,
+ "step": 163900
+ },
+ {
+ "epoch": 0.8458835729874472,
+ "grad_norm": 23423.669921875,
+ "learning_rate": 1.861031348523512e-05,
+ "loss": 0.4066,
+ "step": 163950
+ },
+ {
+ "epoch": 0.8461415429700601,
+ "grad_norm": 25873.208984375,
+ "learning_rate": 1.858188753864452e-05,
+ "loss": 0.4015,
+ "step": 164000
+ },
+ {
+ "epoch": 0.8463995129526728,
+ "grad_norm": 24766.4140625,
+ "learning_rate": 1.8553478362433964e-05,
+ "loss": 0.4076,
+ "step": 164050
+ },
+ {
+ "epoch": 0.8466574829352856,
+ "grad_norm": 25044.45703125,
+ "learning_rate": 1.852508597176776e-05,
+ "loss": 0.3972,
+ "step": 164100
+ },
+ {
+ "epoch": 0.8469154529178985,
+ "grad_norm": 23699.478515625,
+ "learning_rate": 1.8496710381801157e-05,
+ "loss": 0.3953,
+ "step": 164150
+ },
+ {
+ "epoch": 0.8471734229005113,
+ "grad_norm": 22853.53125,
+ "learning_rate": 1.8468351607680546e-05,
+ "loss": 0.4095,
+ "step": 164200
+ },
+ {
+ "epoch": 0.8474313928831241,
+ "grad_norm": 21374.96875,
+ "learning_rate": 1.8440009664543267e-05,
+ "loss": 0.4092,
+ "step": 164250
+ },
+ {
+ "epoch": 0.8476893628657369,
+ "grad_norm": 22454.515625,
+ "learning_rate": 1.8411684567517694e-05,
+ "loss": 0.4005,
+ "step": 164300
+ },
+ {
+ "epoch": 0.8479473328483498,
+ "grad_norm": 23134.24609375,
+ "learning_rate": 1.8383376331723258e-05,
+ "loss": 0.4041,
+ "step": 164350
+ },
+ {
+ "epoch": 0.8482053028309626,
+ "grad_norm": 23000.69921875,
+ "learning_rate": 1.835508497227028e-05,
+ "loss": 0.4056,
+ "step": 164400
+ },
+ {
+ "epoch": 0.8484632728135754,
+ "grad_norm": 23213.333984375,
+ "learning_rate": 1.8326810504260194e-05,
+ "loss": 0.4076,
+ "step": 164450
+ },
+ {
+ "epoch": 0.8487212427961882,
+ "grad_norm": 24883.953125,
+ "learning_rate": 1.8298552942785353e-05,
+ "loss": 0.4023,
+ "step": 164500
+ },
+ {
+ "epoch": 0.8489792127788011,
+ "grad_norm": 23075.015625,
+ "learning_rate": 1.827031230292908e-05,
+ "loss": 0.4095,
+ "step": 164550
+ },
+ {
+ "epoch": 0.8492371827614139,
+ "grad_norm": 24055.23828125,
+ "learning_rate": 1.824208859976569e-05,
+ "loss": 0.4034,
+ "step": 164600
+ },
+ {
+ "epoch": 0.8494951527440268,
+ "grad_norm": 24572.919921875,
+ "learning_rate": 1.8213881848360438e-05,
+ "loss": 0.4106,
+ "step": 164650
+ },
+ {
+ "epoch": 0.8497531227266395,
+ "grad_norm": 26111.40234375,
+ "learning_rate": 1.8185692063769566e-05,
+ "loss": 0.4051,
+ "step": 164700
+ },
+ {
+ "epoch": 0.8500110927092523,
+ "grad_norm": 22763.25,
+ "learning_rate": 1.8157519261040222e-05,
+ "loss": 0.4019,
+ "step": 164750
+ },
+ {
+ "epoch": 0.8502690626918652,
+ "grad_norm": 22230.16796875,
+ "learning_rate": 1.8129363455210503e-05,
+ "loss": 0.4085,
+ "step": 164800
+ },
+ {
+ "epoch": 0.850527032674478,
+ "grad_norm": 24729.40234375,
+ "learning_rate": 1.8101224661309435e-05,
+ "loss": 0.4042,
+ "step": 164850
+ },
+ {
+ "epoch": 0.8507850026570908,
+ "grad_norm": 23329.431640625,
+ "learning_rate": 1.807310289435696e-05,
+ "loss": 0.405,
+ "step": 164900
+ },
+ {
+ "epoch": 0.8510429726397036,
+ "grad_norm": 24267.970703125,
+ "learning_rate": 1.8044998169363908e-05,
+ "loss": 0.406,
+ "step": 164950
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "grad_norm": 23587.689453125,
+ "learning_rate": 1.80169105013321e-05,
+ "loss": 0.4069,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "eval_loss": 0.3912332057952881,
+ "eval_runtime": 3189.1337,
+ "eval_samples_per_second": 972.402,
+ "eval_steps_per_second": 1.899,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8515589126049293,
+ "grad_norm": 23356.634765625,
+ "learning_rate": 1.798883990525412e-05,
+ "loss": 0.4022,
+ "step": 165050
+ },
+ {
+ "epoch": 0.8518168825875421,
+ "grad_norm": 23850.75,
+ "learning_rate": 1.7960786396113542e-05,
+ "loss": 0.3984,
+ "step": 165100
+ },
+ {
+ "epoch": 0.8520748525701549,
+ "grad_norm": 23898.03125,
+ "learning_rate": 1.7932749988884795e-05,
+ "loss": 0.4035,
+ "step": 165150
+ },
+ {
+ "epoch": 0.8523328225527678,
+ "grad_norm": 23517.4453125,
+ "learning_rate": 1.790473069853314e-05,
+ "loss": 0.4061,
+ "step": 165200
+ },
+ {
+ "epoch": 0.8525907925353806,
+ "grad_norm": 24264.568359375,
+ "learning_rate": 1.787672854001478e-05,
+ "loss": 0.4076,
+ "step": 165250
+ },
+ {
+ "epoch": 0.8528487625179934,
+ "grad_norm": 23741.220703125,
+ "learning_rate": 1.7848743528276663e-05,
+ "loss": 0.4063,
+ "step": 165300
+ },
+ {
+ "epoch": 0.8531067325006062,
+ "grad_norm": 25368.697265625,
+ "learning_rate": 1.782077567825669e-05,
+ "loss": 0.4027,
+ "step": 165350
+ },
+ {
+ "epoch": 0.853364702483219,
+ "grad_norm": 21610.12890625,
+ "learning_rate": 1.779282500488355e-05,
+ "loss": 0.4067,
+ "step": 165400
+ },
+ {
+ "epoch": 0.8536226724658319,
+ "grad_norm": 26066.560546875,
+ "learning_rate": 1.7764891523076766e-05,
+ "loss": 0.4091,
+ "step": 165450
+ },
+ {
+ "epoch": 0.8538806424484447,
+ "grad_norm": 22909.5234375,
+ "learning_rate": 1.773697524774669e-05,
+ "loss": 0.4035,
+ "step": 165500
+ },
+ {
+ "epoch": 0.8541386124310575,
+ "grad_norm": 23672.54296875,
+ "learning_rate": 1.7709076193794478e-05,
+ "loss": 0.407,
+ "step": 165550
+ },
+ {
+ "epoch": 0.8543965824136703,
+ "grad_norm": 22466.203125,
+ "learning_rate": 1.7681194376112125e-05,
+ "loss": 0.4057,
+ "step": 165600
+ },
+ {
+ "epoch": 0.8546545523962832,
+ "grad_norm": 23236.4296875,
+ "learning_rate": 1.7653329809582404e-05,
+ "loss": 0.4058,
+ "step": 165650
+ },
+ {
+ "epoch": 0.854912522378896,
+ "grad_norm": 23181.5,
+ "learning_rate": 1.7625482509078873e-05,
+ "loss": 0.4007,
+ "step": 165700
+ },
+ {
+ "epoch": 0.8551704923615088,
+ "grad_norm": 20621.5,
+ "learning_rate": 1.7597652489465877e-05,
+ "loss": 0.4053,
+ "step": 165750
+ },
+ {
+ "epoch": 0.8554284623441216,
+ "grad_norm": 23911.7734375,
+ "learning_rate": 1.756983976559855e-05,
+ "loss": 0.4043,
+ "step": 165800
+ },
+ {
+ "epoch": 0.8556864323267345,
+ "grad_norm": 21440.978515625,
+ "learning_rate": 1.7542044352322768e-05,
+ "loss": 0.4076,
+ "step": 165850
+ },
+ {
+ "epoch": 0.8559444023093473,
+ "grad_norm": 22439.712890625,
+ "learning_rate": 1.7514266264475233e-05,
+ "loss": 0.3999,
+ "step": 165900
+ },
+ {
+ "epoch": 0.8562023722919601,
+ "grad_norm": 24814.876953125,
+ "learning_rate": 1.748650551688328e-05,
+ "loss": 0.405,
+ "step": 165950
+ },
+ {
+ "epoch": 0.8564603422745729,
+ "grad_norm": 21705.185546875,
+ "learning_rate": 1.7458762124365096e-05,
+ "loss": 0.4007,
+ "step": 166000
+ },
+ {
+ "epoch": 0.8567183122571858,
+ "grad_norm": 25317.05078125,
+ "learning_rate": 1.7431036101729604e-05,
+ "loss": 0.4036,
+ "step": 166050
+ },
+ {
+ "epoch": 0.8569762822397986,
+ "grad_norm": 23984.142578125,
+ "learning_rate": 1.7403327463776343e-05,
+ "loss": 0.4027,
+ "step": 166100
+ },
+ {
+ "epoch": 0.8572342522224115,
+ "grad_norm": 24149.794921875,
+ "learning_rate": 1.7375636225295716e-05,
+ "loss": 0.3986,
+ "step": 166150
+ },
+ {
+ "epoch": 0.8574922222050242,
+ "grad_norm": 20085.748046875,
+ "learning_rate": 1.73479624010687e-05,
+ "loss": 0.4032,
+ "step": 166200
+ },
+ {
+ "epoch": 0.857750192187637,
+ "grad_norm": 25550.01171875,
+ "learning_rate": 1.732030600586711e-05,
+ "loss": 0.4067,
+ "step": 166250
+ },
+ {
+ "epoch": 0.8580081621702499,
+ "grad_norm": 23439.69921875,
+ "learning_rate": 1.7292667054453364e-05,
+ "loss": 0.4058,
+ "step": 166300
+ },
+ {
+ "epoch": 0.8582661321528627,
+ "grad_norm": 24064.46484375,
+ "learning_rate": 1.7265045561580606e-05,
+ "loss": 0.406,
+ "step": 166350
+ },
+ {
+ "epoch": 0.8585241021354755,
+ "grad_norm": 27679.162109375,
+ "learning_rate": 1.723744154199264e-05,
+ "loss": 0.403,
+ "step": 166400
+ },
+ {
+ "epoch": 0.8587820721180883,
+ "grad_norm": 21371.59765625,
+ "learning_rate": 1.7209855010423977e-05,
+ "loss": 0.4103,
+ "step": 166450
+ },
+ {
+ "epoch": 0.8590400421007012,
+ "grad_norm": 24340.283203125,
+ "learning_rate": 1.7182285981599766e-05,
+ "loss": 0.4073,
+ "step": 166500
+ },
+ {
+ "epoch": 0.859298012083314,
+ "grad_norm": 22603.62109375,
+ "learning_rate": 1.7154734470235823e-05,
+ "loss": 0.4026,
+ "step": 166550
+ },
+ {
+ "epoch": 0.8595559820659268,
+ "grad_norm": 21442.248046875,
+ "learning_rate": 1.7127200491038607e-05,
+ "loss": 0.4089,
+ "step": 166600
+ },
+ {
+ "epoch": 0.8598139520485396,
+ "grad_norm": 22127.478515625,
+ "learning_rate": 1.7099684058705212e-05,
+ "loss": 0.4073,
+ "step": 166650
+ },
+ {
+ "epoch": 0.8600719220311525,
+ "grad_norm": 37660.0859375,
+ "learning_rate": 1.707218518792342e-05,
+ "loss": 0.404,
+ "step": 166700
+ },
+ {
+ "epoch": 0.8603298920137653,
+ "grad_norm": 23772.982421875,
+ "learning_rate": 1.704470389337153e-05,
+ "loss": 0.4004,
+ "step": 166750
+ },
+ {
+ "epoch": 0.8605878619963782,
+ "grad_norm": 24957.23828125,
+ "learning_rate": 1.7017240189718575e-05,
+ "loss": 0.4025,
+ "step": 166800
+ },
+ {
+ "epoch": 0.8608458319789909,
+ "grad_norm": 25014.044921875,
+ "learning_rate": 1.6989794091624138e-05,
+ "loss": 0.4037,
+ "step": 166850
+ },
+ {
+ "epoch": 0.8611038019616037,
+ "grad_norm": 23370.162109375,
+ "learning_rate": 1.696236561373839e-05,
+ "loss": 0.4043,
+ "step": 166900
+ },
+ {
+ "epoch": 0.8613617719442166,
+ "grad_norm": 25212.830078125,
+ "learning_rate": 1.693495477070217e-05,
+ "loss": 0.3997,
+ "step": 166950
+ },
+ {
+ "epoch": 0.8616197419268294,
+ "grad_norm": 22828.701171875,
+ "learning_rate": 1.69075615771468e-05,
+ "loss": 0.4063,
+ "step": 167000
+ },
+ {
+ "epoch": 0.8618777119094422,
+ "grad_norm": 23862.4375,
+ "learning_rate": 1.6880186047694274e-05,
+ "loss": 0.4044,
+ "step": 167050
+ },
+ {
+ "epoch": 0.862135681892055,
+ "grad_norm": 25248.44140625,
+ "learning_rate": 1.685282819695711e-05,
+ "loss": 0.4072,
+ "step": 167100
+ },
+ {
+ "epoch": 0.8623936518746679,
+ "grad_norm": 24765.2421875,
+ "learning_rate": 1.68254880395384e-05,
+ "loss": 0.4055,
+ "step": 167150
+ },
+ {
+ "epoch": 0.8626516218572807,
+ "grad_norm": 22687.32421875,
+ "learning_rate": 1.6798165590031783e-05,
+ "loss": 0.4076,
+ "step": 167200
+ },
+ {
+ "epoch": 0.8629095918398935,
+ "grad_norm": 28427.16015625,
+ "learning_rate": 1.677086086302146e-05,
+ "loss": 0.3985,
+ "step": 167250
+ },
+ {
+ "epoch": 0.8631675618225063,
+ "grad_norm": 24114.146484375,
+ "learning_rate": 1.6743573873082147e-05,
+ "loss": 0.3993,
+ "step": 167300
+ },
+ {
+ "epoch": 0.8634255318051192,
+ "grad_norm": 22007.857421875,
+ "learning_rate": 1.6716304634779144e-05,
+ "loss": 0.4054,
+ "step": 167350
+ },
+ {
+ "epoch": 0.863683501787732,
+ "grad_norm": 24888.619140625,
+ "learning_rate": 1.6689053162668226e-05,
+ "loss": 0.3983,
+ "step": 167400
+ },
+ {
+ "epoch": 0.8639414717703447,
+ "grad_norm": 23306.1640625,
+ "learning_rate": 1.6661819471295704e-05,
+ "loss": 0.3985,
+ "step": 167450
+ },
+ {
+ "epoch": 0.8641994417529576,
+ "grad_norm": 25983.62109375,
+ "learning_rate": 1.6634603575198387e-05,
+ "loss": 0.4033,
+ "step": 167500
+ },
+ {
+ "epoch": 0.8644574117355704,
+ "grad_norm": 21851.826171875,
+ "learning_rate": 1.6607405488903582e-05,
+ "loss": 0.4067,
+ "step": 167550
+ },
+ {
+ "epoch": 0.8647153817181833,
+ "grad_norm": 23041.548828125,
+ "learning_rate": 1.6580225226929152e-05,
+ "loss": 0.4054,
+ "step": 167600
+ },
+ {
+ "epoch": 0.8649733517007961,
+ "grad_norm": 24893.72265625,
+ "learning_rate": 1.655306280378333e-05,
+ "loss": 0.4081,
+ "step": 167650
+ },
+ {
+ "epoch": 0.8652313216834089,
+ "grad_norm": 24462.869140625,
+ "learning_rate": 1.6525918233964933e-05,
+ "loss": 0.4093,
+ "step": 167700
+ },
+ {
+ "epoch": 0.8654892916660217,
+ "grad_norm": 20188.037109375,
+ "learning_rate": 1.6498791531963197e-05,
+ "loss": 0.3986,
+ "step": 167750
+ },
+ {
+ "epoch": 0.8657472616486346,
+ "grad_norm": 24806.51171875,
+ "learning_rate": 1.6471682712257812e-05,
+ "loss": 0.3988,
+ "step": 167800
+ },
+ {
+ "epoch": 0.8660052316312474,
+ "grad_norm": 21647.11328125,
+ "learning_rate": 1.6444591789318992e-05,
+ "loss": 0.4083,
+ "step": 167850
+ },
+ {
+ "epoch": 0.8662632016138602,
+ "grad_norm": 22894.3515625,
+ "learning_rate": 1.6417518777607277e-05,
+ "loss": 0.4004,
+ "step": 167900
+ },
+ {
+ "epoch": 0.866521171596473,
+ "grad_norm": 23173.974609375,
+ "learning_rate": 1.6390463691573765e-05,
+ "loss": 0.409,
+ "step": 167950
+ },
+ {
+ "epoch": 0.8667791415790859,
+ "grad_norm": 24268.001953125,
+ "learning_rate": 1.6363426545659927e-05,
+ "loss": 0.4021,
+ "step": 168000
+ },
+ {
+ "epoch": 0.8670371115616987,
+ "grad_norm": 23466.482421875,
+ "learning_rate": 1.6336407354297667e-05,
+ "loss": 0.4067,
+ "step": 168050
+ },
+ {
+ "epoch": 0.8672950815443115,
+ "grad_norm": 22965.560546875,
+ "learning_rate": 1.6309406131909298e-05,
+ "loss": 0.4127,
+ "step": 168100
+ },
+ {
+ "epoch": 0.8675530515269243,
+ "grad_norm": 22818.5859375,
+ "learning_rate": 1.6282422892907563e-05,
+ "loss": 0.4107,
+ "step": 168150
+ },
+ {
+ "epoch": 0.8678110215095372,
+ "grad_norm": 23358.80859375,
+ "learning_rate": 1.6255457651695565e-05,
+ "loss": 0.3985,
+ "step": 168200
+ },
+ {
+ "epoch": 0.86806899149215,
+ "grad_norm": 24952.044921875,
+ "learning_rate": 1.6228510422666865e-05,
+ "loss": 0.4021,
+ "step": 168250
+ },
+ {
+ "epoch": 0.8683269614747628,
+ "grad_norm": 23554.359375,
+ "learning_rate": 1.6201581220205353e-05,
+ "loss": 0.4091,
+ "step": 168300
+ },
+ {
+ "epoch": 0.8685849314573756,
+ "grad_norm": 23862.92578125,
+ "learning_rate": 1.6174670058685316e-05,
+ "loss": 0.4009,
+ "step": 168350
+ },
+ {
+ "epoch": 0.8688429014399884,
+ "grad_norm": 23549.693359375,
+ "learning_rate": 1.6147776952471415e-05,
+ "loss": 0.4062,
+ "step": 168400
+ },
+ {
+ "epoch": 0.8691008714226013,
+ "grad_norm": 25237.26953125,
+ "learning_rate": 1.612090191591865e-05,
+ "loss": 0.4009,
+ "step": 168450
+ },
+ {
+ "epoch": 0.8693588414052141,
+ "grad_norm": 24368.298828125,
+ "learning_rate": 1.6094044963372444e-05,
+ "loss": 0.4052,
+ "step": 168500
+ },
+ {
+ "epoch": 0.8696168113878269,
+ "grad_norm": 24438.0,
+ "learning_rate": 1.6067206109168453e-05,
+ "loss": 0.4077,
+ "step": 168550
+ },
+ {
+ "epoch": 0.8698747813704397,
+ "grad_norm": 30002.744140625,
+ "learning_rate": 1.6040385367632786e-05,
+ "loss": 0.4029,
+ "step": 168600
+ },
+ {
+ "epoch": 0.8701327513530526,
+ "grad_norm": 24591.333984375,
+ "learning_rate": 1.6013582753081824e-05,
+ "loss": 0.4019,
+ "step": 168650
+ },
+ {
+ "epoch": 0.8703907213356654,
+ "grad_norm": 24005.166015625,
+ "learning_rate": 1.5986798279822263e-05,
+ "loss": 0.4046,
+ "step": 168700
+ },
+ {
+ "epoch": 0.8706486913182782,
+ "grad_norm": 22198.482421875,
+ "learning_rate": 1.5960031962151167e-05,
+ "loss": 0.4003,
+ "step": 168750
+ },
+ {
+ "epoch": 0.870906661300891,
+ "grad_norm": 23392.919921875,
+ "learning_rate": 1.5933283814355872e-05,
+ "loss": 0.4039,
+ "step": 168800
+ },
+ {
+ "epoch": 0.8711646312835039,
+ "grad_norm": 26185.88671875,
+ "learning_rate": 1.5906553850714003e-05,
+ "loss": 0.4044,
+ "step": 168850
+ },
+ {
+ "epoch": 0.8714226012661167,
+ "grad_norm": 34066.59765625,
+ "learning_rate": 1.5879842085493514e-05,
+ "loss": 0.4068,
+ "step": 168900
+ },
+ {
+ "epoch": 0.8716805712487296,
+ "grad_norm": 21913.802734375,
+ "learning_rate": 1.5853148532952616e-05,
+ "loss": 0.4083,
+ "step": 168950
+ },
+ {
+ "epoch": 0.8719385412313423,
+ "grad_norm": 22491.25390625,
+ "learning_rate": 1.5826473207339802e-05,
+ "loss": 0.4037,
+ "step": 169000
+ },
+ {
+ "epoch": 0.8721965112139551,
+ "grad_norm": 23891.447265625,
+ "learning_rate": 1.579981612289389e-05,
+ "loss": 0.4033,
+ "step": 169050
+ },
+ {
+ "epoch": 0.872454481196568,
+ "grad_norm": 24374.109375,
+ "learning_rate": 1.5773177293843855e-05,
+ "loss": 0.41,
+ "step": 169100
+ },
+ {
+ "epoch": 0.8727124511791808,
+ "grad_norm": 24323.197265625,
+ "learning_rate": 1.574655673440903e-05,
+ "loss": 0.3999,
+ "step": 169150
+ },
+ {
+ "epoch": 0.8729704211617936,
+ "grad_norm": 22040.76171875,
+ "learning_rate": 1.5719954458798943e-05,
+ "loss": 0.3997,
+ "step": 169200
+ },
+ {
+ "epoch": 0.8732283911444064,
+ "grad_norm": 32067.173828125,
+ "learning_rate": 1.5693370481213355e-05,
+ "loss": 0.4028,
+ "step": 169250
+ },
+ {
+ "epoch": 0.8734863611270193,
+ "grad_norm": 27840.97265625,
+ "learning_rate": 1.5666804815842322e-05,
+ "loss": 0.4082,
+ "step": 169300
+ },
+ {
+ "epoch": 0.8737443311096321,
+ "grad_norm": 23976.154296875,
+ "learning_rate": 1.5640257476866033e-05,
+ "loss": 0.4075,
+ "step": 169350
+ },
+ {
+ "epoch": 0.8740023010922449,
+ "grad_norm": 22856.724609375,
+ "learning_rate": 1.5613728478454976e-05,
+ "loss": 0.4033,
+ "step": 169400
+ },
+ {
+ "epoch": 0.8742602710748577,
+ "grad_norm": 22639.69140625,
+ "learning_rate": 1.5587217834769803e-05,
+ "loss": 0.4052,
+ "step": 169450
+ },
+ {
+ "epoch": 0.8745182410574706,
+ "grad_norm": 24272.626953125,
+ "learning_rate": 1.5560725559961386e-05,
+ "loss": 0.4029,
+ "step": 169500
+ },
+ {
+ "epoch": 0.8747762110400834,
+ "grad_norm": 23789.333984375,
+ "learning_rate": 1.553425166817079e-05,
+ "loss": 0.4078,
+ "step": 169550
+ },
+ {
+ "epoch": 0.8750341810226961,
+ "grad_norm": 23287.294921875,
+ "learning_rate": 1.5507796173529248e-05,
+ "loss": 0.408,
+ "step": 169600
+ },
+ {
+ "epoch": 0.875292151005309,
+ "grad_norm": 22272.13671875,
+ "learning_rate": 1.548135909015822e-05,
+ "loss": 0.4017,
+ "step": 169650
+ },
+ {
+ "epoch": 0.8755501209879218,
+ "grad_norm": 24645.40234375,
+ "learning_rate": 1.5454940432169297e-05,
+ "loss": 0.4001,
+ "step": 169700
+ },
+ {
+ "epoch": 0.8758080909705347,
+ "grad_norm": 26364.072265625,
+ "learning_rate": 1.5428540213664243e-05,
+ "loss": 0.411,
+ "step": 169750
+ },
+ {
+ "epoch": 0.8760660609531475,
+ "grad_norm": 24535.76171875,
+ "learning_rate": 1.5402158448734987e-05,
+ "loss": 0.4042,
+ "step": 169800
+ },
+ {
+ "epoch": 0.8763240309357603,
+ "grad_norm": 23294.94140625,
+ "learning_rate": 1.53757951514636e-05,
+ "loss": 0.4083,
+ "step": 169850
+ },
+ {
+ "epoch": 0.8765820009183731,
+ "grad_norm": 23390.046875,
+ "learning_rate": 1.5349450335922295e-05,
+ "loss": 0.399,
+ "step": 169900
+ },
+ {
+ "epoch": 0.876839970900986,
+ "grad_norm": 23079.41796875,
+ "learning_rate": 1.5323124016173455e-05,
+ "loss": 0.4078,
+ "step": 169950
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "grad_norm": 24190.23046875,
+ "learning_rate": 1.529681620626951e-05,
+ "loss": 0.4013,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "eval_loss": 0.39030978083610535,
+ "eval_runtime": 3197.421,
+ "eval_samples_per_second": 969.882,
+ "eval_steps_per_second": 1.894,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8773559108662116,
+ "grad_norm": 24830.658203125,
+ "learning_rate": 1.5270526920253098e-05,
+ "loss": 0.4053,
+ "step": 170050
+ },
+ {
+ "epoch": 0.8776138808488244,
+ "grad_norm": 21314.533203125,
+ "learning_rate": 1.5244256172156923e-05,
+ "loss": 0.4067,
+ "step": 170100
+ },
+ {
+ "epoch": 0.8778718508314373,
+ "grad_norm": 23271.314453125,
+ "learning_rate": 1.521800397600378e-05,
+ "loss": 0.4024,
+ "step": 170150
+ },
+ {
+ "epoch": 0.8781298208140501,
+ "grad_norm": 20112.265625,
+ "learning_rate": 1.5191770345806632e-05,
+ "loss": 0.4001,
+ "step": 170200
+ },
+ {
+ "epoch": 0.8783877907966628,
+ "grad_norm": 23957.087890625,
+ "learning_rate": 1.5165555295568418e-05,
+ "loss": 0.406,
+ "step": 170250
+ },
+ {
+ "epoch": 0.8786457607792757,
+ "grad_norm": 23699.181640625,
+ "learning_rate": 1.5139358839282275e-05,
+ "loss": 0.4005,
+ "step": 170300
+ },
+ {
+ "epoch": 0.8789037307618885,
+ "grad_norm": 23276.4453125,
+ "learning_rate": 1.5113180990931353e-05,
+ "loss": 0.4057,
+ "step": 170350
+ },
+ {
+ "epoch": 0.8791617007445014,
+ "grad_norm": 27051.26171875,
+ "learning_rate": 1.5087021764488867e-05,
+ "loss": 0.4037,
+ "step": 170400
+ },
+ {
+ "epoch": 0.8794196707271142,
+ "grad_norm": 24315.11328125,
+ "learning_rate": 1.5060881173918112e-05,
+ "loss": 0.4004,
+ "step": 170450
+ },
+ {
+ "epoch": 0.879677640709727,
+ "grad_norm": 22589.85546875,
+ "learning_rate": 1.5034759233172419e-05,
+ "loss": 0.402,
+ "step": 170500
+ },
+ {
+ "epoch": 0.8799356106923398,
+ "grad_norm": 24601.666015625,
+ "learning_rate": 1.5008655956195195e-05,
+ "loss": 0.4083,
+ "step": 170550
+ },
+ {
+ "epoch": 0.8801935806749527,
+ "grad_norm": 23203.884765625,
+ "learning_rate": 1.4982571356919862e-05,
+ "loss": 0.3971,
+ "step": 170600
+ },
+ {
+ "epoch": 0.8804515506575655,
+ "grad_norm": 28701.162109375,
+ "learning_rate": 1.4956505449269858e-05,
+ "loss": 0.3989,
+ "step": 170650
+ },
+ {
+ "epoch": 0.8807095206401783,
+ "grad_norm": 23548.541015625,
+ "learning_rate": 1.4930458247158668e-05,
+ "loss": 0.4014,
+ "step": 170700
+ },
+ {
+ "epoch": 0.8809674906227911,
+ "grad_norm": 26836.626953125,
+ "learning_rate": 1.4904429764489792e-05,
+ "loss": 0.3964,
+ "step": 170750
+ },
+ {
+ "epoch": 0.881225460605404,
+ "grad_norm": 23989.537109375,
+ "learning_rate": 1.4878420015156697e-05,
+ "loss": 0.4062,
+ "step": 170800
+ },
+ {
+ "epoch": 0.8814834305880168,
+ "grad_norm": 22008.498046875,
+ "learning_rate": 1.4852429013042945e-05,
+ "loss": 0.4034,
+ "step": 170850
+ },
+ {
+ "epoch": 0.8817414005706296,
+ "grad_norm": 22564.548828125,
+ "learning_rate": 1.4826456772021957e-05,
+ "loss": 0.3953,
+ "step": 170900
+ },
+ {
+ "epoch": 0.8819993705532424,
+ "grad_norm": 20611.005859375,
+ "learning_rate": 1.4800503305957264e-05,
+ "loss": 0.3993,
+ "step": 170950
+ },
+ {
+ "epoch": 0.8822573405358553,
+ "grad_norm": 23731.072265625,
+ "learning_rate": 1.4774568628702312e-05,
+ "loss": 0.4008,
+ "step": 171000
+ },
+ {
+ "epoch": 0.8825153105184681,
+ "grad_norm": 23515.265625,
+ "learning_rate": 1.4748652754100506e-05,
+ "loss": 0.4093,
+ "step": 171050
+ },
+ {
+ "epoch": 0.882773280501081,
+ "grad_norm": 20889.193359375,
+ "learning_rate": 1.4722755695985291e-05,
+ "loss": 0.4036,
+ "step": 171100
+ },
+ {
+ "epoch": 0.8830312504836937,
+ "grad_norm": 23561.208984375,
+ "learning_rate": 1.4696877468179954e-05,
+ "loss": 0.4009,
+ "step": 171150
+ },
+ {
+ "epoch": 0.8832892204663065,
+ "grad_norm": 29216.3046875,
+ "learning_rate": 1.4671018084497828e-05,
+ "loss": 0.4087,
+ "step": 171200
+ },
+ {
+ "epoch": 0.8835471904489194,
+ "grad_norm": 24697.615234375,
+ "learning_rate": 1.4645177558742147e-05,
+ "loss": 0.3976,
+ "step": 171250
+ },
+ {
+ "epoch": 0.8838051604315322,
+ "grad_norm": 30338.123046875,
+ "learning_rate": 1.4619355904706062e-05,
+ "loss": 0.4046,
+ "step": 171300
+ },
+ {
+ "epoch": 0.884063130414145,
+ "grad_norm": 22565.310546875,
+ "learning_rate": 1.4593553136172705e-05,
+ "loss": 0.4011,
+ "step": 171350
+ },
+ {
+ "epoch": 0.8843211003967578,
+ "grad_norm": 23498.0,
+ "learning_rate": 1.4567769266915077e-05,
+ "loss": 0.4071,
+ "step": 171400
+ },
+ {
+ "epoch": 0.8845790703793707,
+ "grad_norm": 23772.279296875,
+ "learning_rate": 1.4542004310696112e-05,
+ "loss": 0.4048,
+ "step": 171450
+ },
+ {
+ "epoch": 0.8848370403619835,
+ "grad_norm": 22418.015625,
+ "learning_rate": 1.4516258281268636e-05,
+ "loss": 0.4009,
+ "step": 171500
+ },
+ {
+ "epoch": 0.8850950103445963,
+ "grad_norm": 25706.166015625,
+ "learning_rate": 1.4490531192375395e-05,
+ "loss": 0.4017,
+ "step": 171550
+ },
+ {
+ "epoch": 0.8853529803272091,
+ "grad_norm": 23563.37890625,
+ "learning_rate": 1.4464823057748982e-05,
+ "loss": 0.4056,
+ "step": 171600
+ },
+ {
+ "epoch": 0.885610950309822,
+ "grad_norm": 23104.65234375,
+ "learning_rate": 1.4439133891111956e-05,
+ "loss": 0.4014,
+ "step": 171650
+ },
+ {
+ "epoch": 0.8858689202924348,
+ "grad_norm": 22858.935546875,
+ "learning_rate": 1.4413463706176627e-05,
+ "loss": 0.4047,
+ "step": 171700
+ },
+ {
+ "epoch": 0.8861268902750475,
+ "grad_norm": 23197.859375,
+ "learning_rate": 1.4387812516645299e-05,
+ "loss": 0.4032,
+ "step": 171750
+ },
+ {
+ "epoch": 0.8863848602576604,
+ "grad_norm": 22323.4609375,
+ "learning_rate": 1.4362180336210057e-05,
+ "loss": 0.4018,
+ "step": 171800
+ },
+ {
+ "epoch": 0.8866428302402732,
+ "grad_norm": 23677.431640625,
+ "learning_rate": 1.433656717855285e-05,
+ "loss": 0.4057,
+ "step": 171850
+ },
+ {
+ "epoch": 0.8869008002228861,
+ "grad_norm": 22975.283203125,
+ "learning_rate": 1.4310973057345538e-05,
+ "loss": 0.4077,
+ "step": 171900
+ },
+ {
+ "epoch": 0.8871587702054989,
+ "grad_norm": 23338.005859375,
+ "learning_rate": 1.4285397986249694e-05,
+ "loss": 0.4037,
+ "step": 171950
+ },
+ {
+ "epoch": 0.8874167401881117,
+ "grad_norm": 22469.08203125,
+ "learning_rate": 1.4259841978916849e-05,
+ "loss": 0.4025,
+ "step": 172000
+ },
+ {
+ "epoch": 0.8876747101707245,
+ "grad_norm": 23508.064453125,
+ "learning_rate": 1.4234305048988288e-05,
+ "loss": 0.3979,
+ "step": 172050
+ },
+ {
+ "epoch": 0.8879326801533374,
+ "grad_norm": 25113.62890625,
+ "learning_rate": 1.4208787210095126e-05,
+ "loss": 0.3988,
+ "step": 172100
+ },
+ {
+ "epoch": 0.8881906501359502,
+ "grad_norm": 23230.75,
+ "learning_rate": 1.4183288475858298e-05,
+ "loss": 0.4029,
+ "step": 172150
+ },
+ {
+ "epoch": 0.888448620118563,
+ "grad_norm": 22058.306640625,
+ "learning_rate": 1.4157808859888516e-05,
+ "loss": 0.4082,
+ "step": 172200
+ },
+ {
+ "epoch": 0.8887065901011758,
+ "grad_norm": 23375.91015625,
+ "learning_rate": 1.4132348375786336e-05,
+ "loss": 0.407,
+ "step": 172250
+ },
+ {
+ "epoch": 0.8889645600837887,
+ "grad_norm": 21199.943359375,
+ "learning_rate": 1.4106907037142059e-05,
+ "loss": 0.4039,
+ "step": 172300
+ },
+ {
+ "epoch": 0.8892225300664015,
+ "grad_norm": 22754.287109375,
+ "learning_rate": 1.4081484857535777e-05,
+ "loss": 0.4,
+ "step": 172350
+ },
+ {
+ "epoch": 0.8894805000490142,
+ "grad_norm": 23116.21484375,
+ "learning_rate": 1.405608185053735e-05,
+ "loss": 0.4026,
+ "step": 172400
+ },
+ {
+ "epoch": 0.8897384700316271,
+ "grad_norm": 22281.65625,
+ "learning_rate": 1.4030698029706423e-05,
+ "loss": 0.3992,
+ "step": 172450
+ },
+ {
+ "epoch": 0.8899964400142399,
+ "grad_norm": 22979.447265625,
+ "learning_rate": 1.400533340859237e-05,
+ "loss": 0.4027,
+ "step": 172500
+ },
+ {
+ "epoch": 0.8902544099968528,
+ "grad_norm": 25733.873046875,
+ "learning_rate": 1.3979988000734373e-05,
+ "loss": 0.4092,
+ "step": 172550
+ },
+ {
+ "epoch": 0.8905123799794656,
+ "grad_norm": 23825.38671875,
+ "learning_rate": 1.395466181966127e-05,
+ "loss": 0.3997,
+ "step": 172600
+ },
+ {
+ "epoch": 0.8907703499620784,
+ "grad_norm": 27504.0703125,
+ "learning_rate": 1.3929354878891715e-05,
+ "loss": 0.403,
+ "step": 172650
+ },
+ {
+ "epoch": 0.8910283199446912,
+ "grad_norm": 28201.208984375,
+ "learning_rate": 1.3904067191934067e-05,
+ "loss": 0.4029,
+ "step": 172700
+ },
+ {
+ "epoch": 0.8912862899273041,
+ "grad_norm": 24115.69140625,
+ "learning_rate": 1.3878798772286377e-05,
+ "loss": 0.3979,
+ "step": 172750
+ },
+ {
+ "epoch": 0.8915442599099169,
+ "grad_norm": 20489.552734375,
+ "learning_rate": 1.3853549633436491e-05,
+ "loss": 0.4001,
+ "step": 172800
+ },
+ {
+ "epoch": 0.8918022298925297,
+ "grad_norm": 23580.583984375,
+ "learning_rate": 1.3828319788861838e-05,
+ "loss": 0.3983,
+ "step": 172850
+ },
+ {
+ "epoch": 0.8920601998751425,
+ "grad_norm": 24172.771484375,
+ "learning_rate": 1.3803109252029678e-05,
+ "loss": 0.4081,
+ "step": 172900
+ },
+ {
+ "epoch": 0.8923181698577554,
+ "grad_norm": 26543.375,
+ "learning_rate": 1.3777918036396887e-05,
+ "loss": 0.4015,
+ "step": 172950
+ },
+ {
+ "epoch": 0.8925761398403682,
+ "grad_norm": 27849.654296875,
+ "learning_rate": 1.3752746155410046e-05,
+ "loss": 0.4045,
+ "step": 173000
+ },
+ {
+ "epoch": 0.892834109822981,
+ "grad_norm": 25752.724609375,
+ "learning_rate": 1.3727593622505424e-05,
+ "loss": 0.4022,
+ "step": 173050
+ },
+ {
+ "epoch": 0.8930920798055938,
+ "grad_norm": 22836.892578125,
+ "learning_rate": 1.3702460451108935e-05,
+ "loss": 0.4015,
+ "step": 173100
+ },
+ {
+ "epoch": 0.8933500497882066,
+ "grad_norm": 26556.62890625,
+ "learning_rate": 1.3677346654636208e-05,
+ "loss": 0.4017,
+ "step": 173150
+ },
+ {
+ "epoch": 0.8936080197708195,
+ "grad_norm": 24310.390625,
+ "learning_rate": 1.3652252246492492e-05,
+ "loss": 0.4015,
+ "step": 173200
+ },
+ {
+ "epoch": 0.8938659897534323,
+ "grad_norm": 23713.0859375,
+ "learning_rate": 1.3627177240072698e-05,
+ "loss": 0.4024,
+ "step": 173250
+ },
+ {
+ "epoch": 0.8941239597360451,
+ "grad_norm": 21189.57421875,
+ "learning_rate": 1.3602121648761373e-05,
+ "loss": 0.4012,
+ "step": 173300
+ },
+ {
+ "epoch": 0.8943819297186579,
+ "grad_norm": 24229.1484375,
+ "learning_rate": 1.3577085485932705e-05,
+ "loss": 0.4105,
+ "step": 173350
+ },
+ {
+ "epoch": 0.8946398997012708,
+ "grad_norm": 23998.22265625,
+ "learning_rate": 1.3552068764950504e-05,
+ "loss": 0.4004,
+ "step": 173400
+ },
+ {
+ "epoch": 0.8948978696838836,
+ "grad_norm": 24751.1171875,
+ "learning_rate": 1.3527071499168253e-05,
+ "loss": 0.4024,
+ "step": 173450
+ },
+ {
+ "epoch": 0.8951558396664964,
+ "grad_norm": 24872.029296875,
+ "learning_rate": 1.3502093701928948e-05,
+ "loss": 0.406,
+ "step": 173500
+ },
+ {
+ "epoch": 0.8954138096491092,
+ "grad_norm": 23180.771484375,
+ "learning_rate": 1.3477135386565297e-05,
+ "loss": 0.4041,
+ "step": 173550
+ },
+ {
+ "epoch": 0.8956717796317221,
+ "grad_norm": 23679.1484375,
+ "learning_rate": 1.3452196566399555e-05,
+ "loss": 0.4095,
+ "step": 173600
+ },
+ {
+ "epoch": 0.8959297496143349,
+ "grad_norm": 26730.537109375,
+ "learning_rate": 1.3427277254743565e-05,
+ "loss": 0.4058,
+ "step": 173650
+ },
+ {
+ "epoch": 0.8961877195969477,
+ "grad_norm": 23320.666015625,
+ "learning_rate": 1.3402377464898813e-05,
+ "loss": 0.4038,
+ "step": 173700
+ },
+ {
+ "epoch": 0.8964456895795605,
+ "grad_norm": 22802.87890625,
+ "learning_rate": 1.3377497210156276e-05,
+ "loss": 0.3977,
+ "step": 173750
+ },
+ {
+ "epoch": 0.8967036595621733,
+ "grad_norm": 21257.22265625,
+ "learning_rate": 1.3352636503796584e-05,
+ "loss": 0.4074,
+ "step": 173800
+ },
+ {
+ "epoch": 0.8969616295447862,
+ "grad_norm": 23935.412109375,
+ "learning_rate": 1.332779535908989e-05,
+ "loss": 0.4021,
+ "step": 173850
+ },
+ {
+ "epoch": 0.8972195995273989,
+ "grad_norm": 21819.267578125,
+ "learning_rate": 1.3302973789295925e-05,
+ "loss": 0.3992,
+ "step": 173900
+ },
+ {
+ "epoch": 0.8974775695100118,
+ "grad_norm": 23360.71875,
+ "learning_rate": 1.327817180766393e-05,
+ "loss": 0.4051,
+ "step": 173950
+ },
+ {
+ "epoch": 0.8977355394926246,
+ "grad_norm": 24474.685546875,
+ "learning_rate": 1.3253389427432772e-05,
+ "loss": 0.4046,
+ "step": 174000
+ },
+ {
+ "epoch": 0.8979935094752375,
+ "grad_norm": 29715.3359375,
+ "learning_rate": 1.3228626661830779e-05,
+ "loss": 0.4037,
+ "step": 174050
+ },
+ {
+ "epoch": 0.8982514794578503,
+ "grad_norm": 23241.20703125,
+ "learning_rate": 1.3203883524075833e-05,
+ "loss": 0.4003,
+ "step": 174100
+ },
+ {
+ "epoch": 0.8985094494404631,
+ "grad_norm": 26005.23828125,
+ "learning_rate": 1.3179160027375347e-05,
+ "loss": 0.3992,
+ "step": 174150
+ },
+ {
+ "epoch": 0.8987674194230759,
+ "grad_norm": 23322.212890625,
+ "learning_rate": 1.3154456184926234e-05,
+ "loss": 0.4037,
+ "step": 174200
+ },
+ {
+ "epoch": 0.8990253894056888,
+ "grad_norm": 22434.90234375,
+ "learning_rate": 1.3129772009914964e-05,
+ "loss": 0.4044,
+ "step": 174250
+ },
+ {
+ "epoch": 0.8992833593883016,
+ "grad_norm": 24753.904296875,
+ "learning_rate": 1.3105107515517418e-05,
+ "loss": 0.4034,
+ "step": 174300
+ },
+ {
+ "epoch": 0.8995413293709144,
+ "grad_norm": 23271.814453125,
+ "learning_rate": 1.3080462714899066e-05,
+ "loss": 0.3992,
+ "step": 174350
+ },
+ {
+ "epoch": 0.8997992993535272,
+ "grad_norm": 23929.7578125,
+ "learning_rate": 1.3055837621214811e-05,
+ "loss": 0.4018,
+ "step": 174400
+ },
+ {
+ "epoch": 0.90005726933614,
+ "grad_norm": 25211.7265625,
+ "learning_rate": 1.3031232247609037e-05,
+ "loss": 0.4052,
+ "step": 174450
+ },
+ {
+ "epoch": 0.9003152393187529,
+ "grad_norm": 24554.791015625,
+ "learning_rate": 1.300664660721566e-05,
+ "loss": 0.3987,
+ "step": 174500
+ },
+ {
+ "epoch": 0.9005732093013656,
+ "grad_norm": 26028.396484375,
+ "learning_rate": 1.2982080713157963e-05,
+ "loss": 0.4032,
+ "step": 174550
+ },
+ {
+ "epoch": 0.9008311792839785,
+ "grad_norm": 24228.72265625,
+ "learning_rate": 1.295753457854878e-05,
+ "loss": 0.4001,
+ "step": 174600
+ },
+ {
+ "epoch": 0.9010891492665913,
+ "grad_norm": 24043.064453125,
+ "learning_rate": 1.293300821649036e-05,
+ "loss": 0.4009,
+ "step": 174650
+ },
+ {
+ "epoch": 0.9013471192492042,
+ "grad_norm": 25628.208984375,
+ "learning_rate": 1.2908501640074388e-05,
+ "loss": 0.4058,
+ "step": 174700
+ },
+ {
+ "epoch": 0.901605089231817,
+ "grad_norm": 23927.81640625,
+ "learning_rate": 1.288401486238201e-05,
+ "loss": 0.4044,
+ "step": 174750
+ },
+ {
+ "epoch": 0.9018630592144298,
+ "grad_norm": 23615.923828125,
+ "learning_rate": 1.2859547896483793e-05,
+ "loss": 0.4042,
+ "step": 174800
+ },
+ {
+ "epoch": 0.9021210291970426,
+ "grad_norm": 24990.158203125,
+ "learning_rate": 1.2835100755439705e-05,
+ "loss": 0.4033,
+ "step": 174850
+ },
+ {
+ "epoch": 0.9023789991796555,
+ "grad_norm": 23908.240234375,
+ "learning_rate": 1.2810673452299194e-05,
+ "loss": 0.404,
+ "step": 174900
+ },
+ {
+ "epoch": 0.9026369691622683,
+ "grad_norm": 24776.828125,
+ "learning_rate": 1.278626600010106e-05,
+ "loss": 0.4017,
+ "step": 174950
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "grad_norm": 23400.912109375,
+ "learning_rate": 1.276187841187354e-05,
+ "loss": 0.4007,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "eval_loss": 0.389443963766098,
+ "eval_runtime": 3184.6844,
+ "eval_samples_per_second": 973.761,
+ "eval_steps_per_second": 1.902,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9031529091274939,
+ "grad_norm": 23482.337890625,
+ "learning_rate": 1.2737510700634248e-05,
+ "loss": 0.4033,
+ "step": 175050
+ },
+ {
+ "epoch": 0.9034108791101068,
+ "grad_norm": 24351.23828125,
+ "learning_rate": 1.2713162879390183e-05,
+ "loss": 0.4031,
+ "step": 175100
+ },
+ {
+ "epoch": 0.9036688490927196,
+ "grad_norm": 28495.6796875,
+ "learning_rate": 1.2688834961137785e-05,
+ "loss": 0.4057,
+ "step": 175150
+ },
+ {
+ "epoch": 0.9039268190753323,
+ "grad_norm": 23276.583984375,
+ "learning_rate": 1.2664526958862765e-05,
+ "loss": 0.4036,
+ "step": 175200
+ },
+ {
+ "epoch": 0.9041847890579452,
+ "grad_norm": 22784.033203125,
+ "learning_rate": 1.2640238885540312e-05,
+ "loss": 0.4054,
+ "step": 175250
+ },
+ {
+ "epoch": 0.904442759040558,
+ "grad_norm": 22389.21484375,
+ "learning_rate": 1.2615970754134914e-05,
+ "loss": 0.4036,
+ "step": 175300
+ },
+ {
+ "epoch": 0.9047007290231709,
+ "grad_norm": 24767.59375,
+ "learning_rate": 1.2591722577600412e-05,
+ "loss": 0.4055,
+ "step": 175350
+ },
+ {
+ "epoch": 0.9049586990057837,
+ "grad_norm": 24981.552734375,
+ "learning_rate": 1.2567494368880056e-05,
+ "loss": 0.3997,
+ "step": 175400
+ },
+ {
+ "epoch": 0.9052166689883965,
+ "grad_norm": 24523.580078125,
+ "learning_rate": 1.254328614090634e-05,
+ "loss": 0.4009,
+ "step": 175450
+ },
+ {
+ "epoch": 0.9054746389710093,
+ "grad_norm": 29571.404296875,
+ "learning_rate": 1.251909790660119e-05,
+ "loss": 0.4013,
+ "step": 175500
+ },
+ {
+ "epoch": 0.9057326089536222,
+ "grad_norm": 23286.564453125,
+ "learning_rate": 1.24949296788758e-05,
+ "loss": 0.3997,
+ "step": 175550
+ },
+ {
+ "epoch": 0.905990578936235,
+ "grad_norm": 23124.205078125,
+ "learning_rate": 1.247078147063071e-05,
+ "loss": 0.4056,
+ "step": 175600
+ },
+ {
+ "epoch": 0.9062485489188478,
+ "grad_norm": 23467.775390625,
+ "learning_rate": 1.2446653294755755e-05,
+ "loss": 0.3976,
+ "step": 175650
+ },
+ {
+ "epoch": 0.9065065189014606,
+ "grad_norm": 23793.609375,
+ "learning_rate": 1.2422545164130096e-05,
+ "loss": 0.4018,
+ "step": 175700
+ },
+ {
+ "epoch": 0.9067644888840735,
+ "grad_norm": 24439.974609375,
+ "learning_rate": 1.2398457091622167e-05,
+ "loss": 0.4063,
+ "step": 175750
+ },
+ {
+ "epoch": 0.9070224588666863,
+ "grad_norm": 23925.22265625,
+ "learning_rate": 1.2374389090089744e-05,
+ "loss": 0.4039,
+ "step": 175800
+ },
+ {
+ "epoch": 0.907280428849299,
+ "grad_norm": 23174.416015625,
+ "learning_rate": 1.2350341172379853e-05,
+ "loss": 0.4031,
+ "step": 175850
+ },
+ {
+ "epoch": 0.9075383988319119,
+ "grad_norm": 26669.806640625,
+ "learning_rate": 1.2326313351328794e-05,
+ "loss": 0.4031,
+ "step": 175900
+ },
+ {
+ "epoch": 0.9077963688145247,
+ "grad_norm": 21128.041015625,
+ "learning_rate": 1.2302305639762168e-05,
+ "loss": 0.407,
+ "step": 175950
+ },
+ {
+ "epoch": 0.9080543387971376,
+ "grad_norm": 22798.111328125,
+ "learning_rate": 1.2278318050494797e-05,
+ "loss": 0.4035,
+ "step": 176000
+ },
+ {
+ "epoch": 0.9083123087797504,
+ "grad_norm": 23327.587890625,
+ "learning_rate": 1.2254350596330843e-05,
+ "loss": 0.3958,
+ "step": 176050
+ },
+ {
+ "epoch": 0.9085702787623632,
+ "grad_norm": 22225.3125,
+ "learning_rate": 1.2230403290063613e-05,
+ "loss": 0.4074,
+ "step": 176100
+ },
+ {
+ "epoch": 0.908828248744976,
+ "grad_norm": 22727.791015625,
+ "learning_rate": 1.2206476144475754e-05,
+ "loss": 0.4063,
+ "step": 176150
+ },
+ {
+ "epoch": 0.9090862187275889,
+ "grad_norm": 26138.931640625,
+ "learning_rate": 1.2182569172339098e-05,
+ "loss": 0.408,
+ "step": 176200
+ },
+ {
+ "epoch": 0.9093441887102017,
+ "grad_norm": 23436.91796875,
+ "learning_rate": 1.2158682386414716e-05,
+ "loss": 0.4038,
+ "step": 176250
+ },
+ {
+ "epoch": 0.9096021586928145,
+ "grad_norm": 23695.244140625,
+ "learning_rate": 1.2134815799452947e-05,
+ "loss": 0.4074,
+ "step": 176300
+ },
+ {
+ "epoch": 0.9098601286754273,
+ "grad_norm": 25616.240234375,
+ "learning_rate": 1.2110969424193263e-05,
+ "loss": 0.3971,
+ "step": 176350
+ },
+ {
+ "epoch": 0.9101180986580402,
+ "grad_norm": 27326.634765625,
+ "learning_rate": 1.2087143273364431e-05,
+ "loss": 0.4045,
+ "step": 176400
+ },
+ {
+ "epoch": 0.910376068640653,
+ "grad_norm": 23704.775390625,
+ "learning_rate": 1.2063337359684384e-05,
+ "loss": 0.4071,
+ "step": 176450
+ },
+ {
+ "epoch": 0.9106340386232658,
+ "grad_norm": 25532.234375,
+ "learning_rate": 1.2039551695860251e-05,
+ "loss": 0.4021,
+ "step": 176500
+ },
+ {
+ "epoch": 0.9108920086058786,
+ "grad_norm": 25247.884765625,
+ "learning_rate": 1.201578629458835e-05,
+ "loss": 0.4074,
+ "step": 176550
+ },
+ {
+ "epoch": 0.9111499785884914,
+ "grad_norm": 29377.486328125,
+ "learning_rate": 1.1992041168554236e-05,
+ "loss": 0.4064,
+ "step": 176600
+ },
+ {
+ "epoch": 0.9114079485711043,
+ "grad_norm": 22188.34375,
+ "learning_rate": 1.1968316330432527e-05,
+ "loss": 0.404,
+ "step": 176650
+ },
+ {
+ "epoch": 0.911665918553717,
+ "grad_norm": 23766.0546875,
+ "learning_rate": 1.194461179288714e-05,
+ "loss": 0.4016,
+ "step": 176700
+ },
+ {
+ "epoch": 0.9119238885363299,
+ "grad_norm": 21386.623046875,
+ "learning_rate": 1.1920927568571078e-05,
+ "loss": 0.4055,
+ "step": 176750
+ },
+ {
+ "epoch": 0.9121818585189427,
+ "grad_norm": 25873.052734375,
+ "learning_rate": 1.1897263670126507e-05,
+ "loss": 0.3978,
+ "step": 176800
+ },
+ {
+ "epoch": 0.9124398285015556,
+ "grad_norm": 25235.5390625,
+ "learning_rate": 1.1873620110184803e-05,
+ "loss": 0.3975,
+ "step": 176850
+ },
+ {
+ "epoch": 0.9126977984841684,
+ "grad_norm": 22841.5,
+ "learning_rate": 1.1849996901366383e-05,
+ "loss": 0.4031,
+ "step": 176900
+ },
+ {
+ "epoch": 0.9129557684667812,
+ "grad_norm": 21522.388671875,
+ "learning_rate": 1.1826394056280893e-05,
+ "loss": 0.4048,
+ "step": 176950
+ },
+ {
+ "epoch": 0.913213738449394,
+ "grad_norm": 27600.689453125,
+ "learning_rate": 1.1802811587527074e-05,
+ "loss": 0.3984,
+ "step": 177000
+ },
+ {
+ "epoch": 0.9134717084320069,
+ "grad_norm": 24698.60546875,
+ "learning_rate": 1.177924950769278e-05,
+ "loss": 0.406,
+ "step": 177050
+ },
+ {
+ "epoch": 0.9137296784146197,
+ "grad_norm": 27378.033203125,
+ "learning_rate": 1.1755707829355001e-05,
+ "loss": 0.3993,
+ "step": 177100
+ },
+ {
+ "epoch": 0.9139876483972325,
+ "grad_norm": 27578.4296875,
+ "learning_rate": 1.1732186565079805e-05,
+ "loss": 0.3984,
+ "step": 177150
+ },
+ {
+ "epoch": 0.9142456183798453,
+ "grad_norm": 24650.6953125,
+ "learning_rate": 1.1708685727422424e-05,
+ "loss": 0.401,
+ "step": 177200
+ },
+ {
+ "epoch": 0.9145035883624582,
+ "grad_norm": 25550.0859375,
+ "learning_rate": 1.1685205328927135e-05,
+ "loss": 0.399,
+ "step": 177250
+ },
+ {
+ "epoch": 0.914761558345071,
+ "grad_norm": 22760.77734375,
+ "learning_rate": 1.166174538212732e-05,
+ "loss": 0.403,
+ "step": 177300
+ },
+ {
+ "epoch": 0.9150195283276837,
+ "grad_norm": 22038.26171875,
+ "learning_rate": 1.1638305899545443e-05,
+ "loss": 0.4066,
+ "step": 177350
+ },
+ {
+ "epoch": 0.9152774983102966,
+ "grad_norm": 23857.66015625,
+ "learning_rate": 1.1614886893693044e-05,
+ "loss": 0.4038,
+ "step": 177400
+ },
+ {
+ "epoch": 0.9155354682929094,
+ "grad_norm": 24813.55859375,
+ "learning_rate": 1.1591488377070724e-05,
+ "loss": 0.3992,
+ "step": 177450
+ },
+ {
+ "epoch": 0.9157934382755223,
+ "grad_norm": 24467.5859375,
+ "learning_rate": 1.1568110362168199e-05,
+ "loss": 0.4,
+ "step": 177500
+ },
+ {
+ "epoch": 0.9160514082581351,
+ "grad_norm": 22464.98046875,
+ "learning_rate": 1.1544752861464143e-05,
+ "loss": 0.4069,
+ "step": 177550
+ },
+ {
+ "epoch": 0.9163093782407479,
+ "grad_norm": 26591.51171875,
+ "learning_rate": 1.1521415887426379e-05,
+ "loss": 0.4008,
+ "step": 177600
+ },
+ {
+ "epoch": 0.9165673482233607,
+ "grad_norm": 21086.318359375,
+ "learning_rate": 1.1498099452511724e-05,
+ "loss": 0.4036,
+ "step": 177650
+ },
+ {
+ "epoch": 0.9168253182059736,
+ "grad_norm": 24243.072265625,
+ "learning_rate": 1.147480356916602e-05,
+ "loss": 0.4019,
+ "step": 177700
+ },
+ {
+ "epoch": 0.9170832881885864,
+ "grad_norm": 26714.83984375,
+ "learning_rate": 1.1451528249824206e-05,
+ "loss": 0.3978,
+ "step": 177750
+ },
+ {
+ "epoch": 0.9173412581711992,
+ "grad_norm": 24799.712890625,
+ "learning_rate": 1.1428273506910132e-05,
+ "loss": 0.4078,
+ "step": 177800
+ },
+ {
+ "epoch": 0.917599228153812,
+ "grad_norm": 25010.435546875,
+ "learning_rate": 1.1405039352836777e-05,
+ "loss": 0.4054,
+ "step": 177850
+ },
+ {
+ "epoch": 0.9178571981364249,
+ "grad_norm": 23657.78125,
+ "learning_rate": 1.1381825800006068e-05,
+ "loss": 0.4001,
+ "step": 177900
+ },
+ {
+ "epoch": 0.9181151681190377,
+ "grad_norm": 23865.349609375,
+ "learning_rate": 1.1358632860808955e-05,
+ "loss": 0.4012,
+ "step": 177950
+ },
+ {
+ "epoch": 0.9183731381016504,
+ "grad_norm": 26476.04296875,
+ "learning_rate": 1.1335460547625365e-05,
+ "loss": 0.3998,
+ "step": 178000
+ },
+ {
+ "epoch": 0.9186311080842633,
+ "grad_norm": 24907.89453125,
+ "learning_rate": 1.1312308872824235e-05,
+ "loss": 0.401,
+ "step": 178050
+ },
+ {
+ "epoch": 0.9188890780668761,
+ "grad_norm": 24008.54296875,
+ "learning_rate": 1.1289177848763494e-05,
+ "loss": 0.3991,
+ "step": 178100
+ },
+ {
+ "epoch": 0.919147048049489,
+ "grad_norm": 23814.396484375,
+ "learning_rate": 1.1266067487790027e-05,
+ "loss": 0.4039,
+ "step": 178150
+ },
+ {
+ "epoch": 0.9194050180321018,
+ "grad_norm": 25892.994140625,
+ "learning_rate": 1.1242977802239696e-05,
+ "loss": 0.4015,
+ "step": 178200
+ },
+ {
+ "epoch": 0.9196629880147146,
+ "grad_norm": 24185.7265625,
+ "learning_rate": 1.1219908804437328e-05,
+ "loss": 0.3992,
+ "step": 178250
+ },
+ {
+ "epoch": 0.9199209579973274,
+ "grad_norm": 23890.54296875,
+ "learning_rate": 1.1196860506696705e-05,
+ "loss": 0.4087,
+ "step": 178300
+ },
+ {
+ "epoch": 0.9201789279799403,
+ "grad_norm": 25288.83203125,
+ "learning_rate": 1.1173832921320554e-05,
+ "loss": 0.4038,
+ "step": 178350
+ },
+ {
+ "epoch": 0.9204368979625531,
+ "grad_norm": 27609.994140625,
+ "learning_rate": 1.1150826060600594e-05,
+ "loss": 0.4047,
+ "step": 178400
+ },
+ {
+ "epoch": 0.9206948679451659,
+ "grad_norm": 25010.259765625,
+ "learning_rate": 1.112783993681738e-05,
+ "loss": 0.4037,
+ "step": 178450
+ },
+ {
+ "epoch": 0.9209528379277787,
+ "grad_norm": 23663.78515625,
+ "learning_rate": 1.1104874562240514e-05,
+ "loss": 0.396,
+ "step": 178500
+ },
+ {
+ "epoch": 0.9212108079103916,
+ "grad_norm": 24960.072265625,
+ "learning_rate": 1.108192994912844e-05,
+ "loss": 0.4024,
+ "step": 178550
+ },
+ {
+ "epoch": 0.9214687778930044,
+ "grad_norm": 22778.66796875,
+ "learning_rate": 1.1059006109728543e-05,
+ "loss": 0.4039,
+ "step": 178600
+ },
+ {
+ "epoch": 0.9217267478756171,
+ "grad_norm": 20177.640625,
+ "learning_rate": 1.1036103056277165e-05,
+ "loss": 0.4008,
+ "step": 178650
+ },
+ {
+ "epoch": 0.92198471785823,
+ "grad_norm": 25084.703125,
+ "learning_rate": 1.1013220800999452e-05,
+ "loss": 0.4082,
+ "step": 178700
+ },
+ {
+ "epoch": 0.9222426878408428,
+ "grad_norm": 23697.529296875,
+ "learning_rate": 1.0990359356109558e-05,
+ "loss": 0.4083,
+ "step": 178750
+ },
+ {
+ "epoch": 0.9225006578234557,
+ "grad_norm": 26252.25,
+ "learning_rate": 1.0967518733810462e-05,
+ "loss": 0.4114,
+ "step": 178800
+ },
+ {
+ "epoch": 0.9227586278060684,
+ "grad_norm": 25295.103515625,
+ "learning_rate": 1.094469894629403e-05,
+ "loss": 0.4062,
+ "step": 178850
+ },
+ {
+ "epoch": 0.9230165977886813,
+ "grad_norm": 24484.203125,
+ "learning_rate": 1.0921900005741053e-05,
+ "loss": 0.4008,
+ "step": 178900
+ },
+ {
+ "epoch": 0.9232745677712941,
+ "grad_norm": 23360.701171875,
+ "learning_rate": 1.0899121924321154e-05,
+ "loss": 0.405,
+ "step": 178950
+ },
+ {
+ "epoch": 0.923532537753907,
+ "grad_norm": 22507.24609375,
+ "learning_rate": 1.0876364714192822e-05,
+ "loss": 0.3968,
+ "step": 179000
+ },
+ {
+ "epoch": 0.9237905077365198,
+ "grad_norm": 26761.66015625,
+ "learning_rate": 1.0853628387503423e-05,
+ "loss": 0.4021,
+ "step": 179050
+ },
+ {
+ "epoch": 0.9240484777191326,
+ "grad_norm": 26596.376953125,
+ "learning_rate": 1.0830912956389166e-05,
+ "loss": 0.3984,
+ "step": 179100
+ },
+ {
+ "epoch": 0.9243064477017454,
+ "grad_norm": 23996.490234375,
+ "learning_rate": 1.0808218432975093e-05,
+ "loss": 0.3996,
+ "step": 179150
+ },
+ {
+ "epoch": 0.9245644176843583,
+ "grad_norm": 22681.4609375,
+ "learning_rate": 1.0785544829375143e-05,
+ "loss": 0.4021,
+ "step": 179200
+ },
+ {
+ "epoch": 0.9248223876669711,
+ "grad_norm": 25675.728515625,
+ "learning_rate": 1.0762892157691995e-05,
+ "loss": 0.3942,
+ "step": 179250
+ },
+ {
+ "epoch": 0.9250803576495839,
+ "grad_norm": 26039.25,
+ "learning_rate": 1.0740260430017247e-05,
+ "loss": 0.4014,
+ "step": 179300
+ },
+ {
+ "epoch": 0.9253383276321967,
+ "grad_norm": 21596.50390625,
+ "learning_rate": 1.0717649658431256e-05,
+ "loss": 0.4017,
+ "step": 179350
+ },
+ {
+ "epoch": 0.9255962976148095,
+ "grad_norm": 25318.3125,
+ "learning_rate": 1.0695059855003204e-05,
+ "loss": 0.3968,
+ "step": 179400
+ },
+ {
+ "epoch": 0.9258542675974224,
+ "grad_norm": 20999.10546875,
+ "learning_rate": 1.0672491031791137e-05,
+ "loss": 0.4032,
+ "step": 179450
+ },
+ {
+ "epoch": 0.9261122375800351,
+ "grad_norm": 25034.404296875,
+ "learning_rate": 1.0649943200841794e-05,
+ "loss": 0.3987,
+ "step": 179500
+ },
+ {
+ "epoch": 0.926370207562648,
+ "grad_norm": 23470.205078125,
+ "learning_rate": 1.0627416374190819e-05,
+ "loss": 0.4009,
+ "step": 179550
+ },
+ {
+ "epoch": 0.9266281775452608,
+ "grad_norm": 23667.298828125,
+ "learning_rate": 1.0604910563862575e-05,
+ "loss": 0.4022,
+ "step": 179600
+ },
+ {
+ "epoch": 0.9268861475278737,
+ "grad_norm": 25315.5390625,
+ "learning_rate": 1.058242578187023e-05,
+ "loss": 0.4023,
+ "step": 179650
+ },
+ {
+ "epoch": 0.9271441175104865,
+ "grad_norm": 23639.34375,
+ "learning_rate": 1.0559962040215727e-05,
+ "loss": 0.407,
+ "step": 179700
+ },
+ {
+ "epoch": 0.9274020874930993,
+ "grad_norm": 29350.244140625,
+ "learning_rate": 1.0537519350889764e-05,
+ "loss": 0.4063,
+ "step": 179750
+ },
+ {
+ "epoch": 0.9276600574757121,
+ "grad_norm": 26077.30859375,
+ "learning_rate": 1.051509772587183e-05,
+ "loss": 0.4011,
+ "step": 179800
+ },
+ {
+ "epoch": 0.927918027458325,
+ "grad_norm": 22387.8046875,
+ "learning_rate": 1.0492697177130157e-05,
+ "loss": 0.398,
+ "step": 179850
+ },
+ {
+ "epoch": 0.9281759974409378,
+ "grad_norm": 24023.2734375,
+ "learning_rate": 1.0470317716621719e-05,
+ "loss": 0.4026,
+ "step": 179900
+ },
+ {
+ "epoch": 0.9284339674235506,
+ "grad_norm": 24288.666015625,
+ "learning_rate": 1.044795935629223e-05,
+ "loss": 0.403,
+ "step": 179950
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "grad_norm": 26163.923828125,
+ "learning_rate": 1.042562210807616e-05,
+ "loss": 0.4001,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "eval_loss": 0.3886363208293915,
+ "eval_runtime": 3188.2841,
+ "eval_samples_per_second": 972.661,
+ "eval_steps_per_second": 1.9,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9289499073887763,
+ "grad_norm": 24379.322265625,
+ "learning_rate": 1.0403305983896683e-05,
+ "loss": 0.3978,
+ "step": 180050
+ },
+ {
+ "epoch": 0.9292078773713891,
+ "grad_norm": 23249.939453125,
+ "learning_rate": 1.0381010995665752e-05,
+ "loss": 0.4055,
+ "step": 180100
+ },
+ {
+ "epoch": 0.9294658473540018,
+ "grad_norm": 25460.6875,
+ "learning_rate": 1.0358737155283942e-05,
+ "loss": 0.4059,
+ "step": 180150
+ },
+ {
+ "epoch": 0.9297238173366147,
+ "grad_norm": 23166.548828125,
+ "learning_rate": 1.0336484474640651e-05,
+ "loss": 0.4051,
+ "step": 180200
+ },
+ {
+ "epoch": 0.9299817873192275,
+ "grad_norm": 23631.94921875,
+ "learning_rate": 1.0314252965613908e-05,
+ "loss": 0.3974,
+ "step": 180250
+ },
+ {
+ "epoch": 0.9302397573018404,
+ "grad_norm": 26213.556640625,
+ "learning_rate": 1.0292042640070449e-05,
+ "loss": 0.3983,
+ "step": 180300
+ },
+ {
+ "epoch": 0.9304977272844532,
+ "grad_norm": 24056.875,
+ "learning_rate": 1.0269853509865751e-05,
+ "loss": 0.3979,
+ "step": 180350
+ },
+ {
+ "epoch": 0.930755697267066,
+ "grad_norm": 24793.658203125,
+ "learning_rate": 1.0247685586843897e-05,
+ "loss": 0.3993,
+ "step": 180400
+ },
+ {
+ "epoch": 0.9310136672496788,
+ "grad_norm": 25296.04296875,
+ "learning_rate": 1.0225538882837733e-05,
+ "loss": 0.4047,
+ "step": 180450
+ },
+ {
+ "epoch": 0.9312716372322917,
+ "grad_norm": 21486.990234375,
+ "learning_rate": 1.0203413409668722e-05,
+ "loss": 0.3995,
+ "step": 180500
+ },
+ {
+ "epoch": 0.9315296072149045,
+ "grad_norm": 24168.083984375,
+ "learning_rate": 1.018130917914702e-05,
+ "loss": 0.4081,
+ "step": 180550
+ },
+ {
+ "epoch": 0.9317875771975173,
+ "grad_norm": 25313.568359375,
+ "learning_rate": 1.0159226203071431e-05,
+ "loss": 0.4024,
+ "step": 180600
+ },
+ {
+ "epoch": 0.9320455471801301,
+ "grad_norm": 22535.845703125,
+ "learning_rate": 1.0137164493229411e-05,
+ "loss": 0.3974,
+ "step": 180650
+ },
+ {
+ "epoch": 0.932303517162743,
+ "grad_norm": 24480.0703125,
+ "learning_rate": 1.0115124061397102e-05,
+ "loss": 0.4031,
+ "step": 180700
+ },
+ {
+ "epoch": 0.9325614871453558,
+ "grad_norm": 29667.470703125,
+ "learning_rate": 1.0093104919339241e-05,
+ "loss": 0.3991,
+ "step": 180750
+ },
+ {
+ "epoch": 0.9328194571279685,
+ "grad_norm": 22311.767578125,
+ "learning_rate": 1.0071107078809228e-05,
+ "loss": 0.402,
+ "step": 180800
+ },
+ {
+ "epoch": 0.9330774271105814,
+ "grad_norm": 22752.642578125,
+ "learning_rate": 1.0049130551549068e-05,
+ "loss": 0.4022,
+ "step": 180850
+ },
+ {
+ "epoch": 0.9333353970931942,
+ "grad_norm": 26333.43359375,
+ "learning_rate": 1.0027175349289424e-05,
+ "loss": 0.4006,
+ "step": 180900
+ },
+ {
+ "epoch": 0.9335933670758071,
+ "grad_norm": 22951.927734375,
+ "learning_rate": 1.0005241483749533e-05,
+ "loss": 0.4022,
+ "step": 180950
+ },
+ {
+ "epoch": 0.9338513370584198,
+ "grad_norm": 24532.15625,
+ "learning_rate": 9.983328966637318e-06,
+ "loss": 0.398,
+ "step": 181000
+ },
+ {
+ "epoch": 0.9341093070410327,
+ "grad_norm": 24624.205078125,
+ "learning_rate": 9.961437809649188e-06,
+ "loss": 0.4021,
+ "step": 181050
+ },
+ {
+ "epoch": 0.9343672770236455,
+ "grad_norm": 23679.087890625,
+ "learning_rate": 9.93956802447027e-06,
+ "loss": 0.4038,
+ "step": 181100
+ },
+ {
+ "epoch": 0.9346252470062584,
+ "grad_norm": 22279.52734375,
+ "learning_rate": 9.917719622774219e-06,
+ "loss": 0.3987,
+ "step": 181150
+ },
+ {
+ "epoch": 0.9348832169888712,
+ "grad_norm": 25709.376953125,
+ "learning_rate": 9.895892616223268e-06,
+ "loss": 0.4062,
+ "step": 181200
+ },
+ {
+ "epoch": 0.935141186971484,
+ "grad_norm": 24607.25,
+ "learning_rate": 9.874087016468298e-06,
+ "loss": 0.3973,
+ "step": 181250
+ },
+ {
+ "epoch": 0.9353991569540968,
+ "grad_norm": 25458.861328125,
+ "learning_rate": 9.852302835148652e-06,
+ "loss": 0.3993,
+ "step": 181300
+ },
+ {
+ "epoch": 0.9356571269367097,
+ "grad_norm": 24070.654296875,
+ "learning_rate": 9.830540083892358e-06,
+ "loss": 0.4057,
+ "step": 181350
+ },
+ {
+ "epoch": 0.9359150969193225,
+ "grad_norm": 25323.736328125,
+ "learning_rate": 9.80879877431593e-06,
+ "loss": 0.407,
+ "step": 181400
+ },
+ {
+ "epoch": 0.9361730669019352,
+ "grad_norm": 27513.087890625,
+ "learning_rate": 9.787078918024455e-06,
+ "loss": 0.3979,
+ "step": 181450
+ },
+ {
+ "epoch": 0.9364310368845481,
+ "grad_norm": 22324.669921875,
+ "learning_rate": 9.765380526611568e-06,
+ "loss": 0.3984,
+ "step": 181500
+ },
+ {
+ "epoch": 0.936689006867161,
+ "grad_norm": 23778.37890625,
+ "learning_rate": 9.743703611659465e-06,
+ "loss": 0.4055,
+ "step": 181550
+ },
+ {
+ "epoch": 0.9369469768497738,
+ "grad_norm": 26777.255859375,
+ "learning_rate": 9.722048184738864e-06,
+ "loss": 0.4047,
+ "step": 181600
+ },
+ {
+ "epoch": 0.9372049468323865,
+ "grad_norm": 23210.876953125,
+ "learning_rate": 9.700414257409002e-06,
+ "loss": 0.393,
+ "step": 181650
+ },
+ {
+ "epoch": 0.9374629168149994,
+ "grad_norm": 22539.84765625,
+ "learning_rate": 9.67880184121765e-06,
+ "loss": 0.4069,
+ "step": 181700
+ },
+ {
+ "epoch": 0.9377208867976122,
+ "grad_norm": 25191.609375,
+ "learning_rate": 9.65721094770109e-06,
+ "loss": 0.4069,
+ "step": 181750
+ },
+ {
+ "epoch": 0.9379788567802251,
+ "grad_norm": 23813.578125,
+ "learning_rate": 9.63564158838416e-06,
+ "loss": 0.3954,
+ "step": 181800
+ },
+ {
+ "epoch": 0.9382368267628379,
+ "grad_norm": 23869.703125,
+ "learning_rate": 9.614093774780114e-06,
+ "loss": 0.3998,
+ "step": 181850
+ },
+ {
+ "epoch": 0.9384947967454507,
+ "grad_norm": 23316.384765625,
+ "learning_rate": 9.5925675183908e-06,
+ "loss": 0.3989,
+ "step": 181900
+ },
+ {
+ "epoch": 0.9387527667280635,
+ "grad_norm": 23641.65625,
+ "learning_rate": 9.571062830706496e-06,
+ "loss": 0.4017,
+ "step": 181950
+ },
+ {
+ "epoch": 0.9390107367106764,
+ "grad_norm": 23724.431640625,
+ "learning_rate": 9.549579723205982e-06,
+ "loss": 0.4042,
+ "step": 182000
+ },
+ {
+ "epoch": 0.9392687066932892,
+ "grad_norm": 24013.849609375,
+ "learning_rate": 9.528118207356556e-06,
+ "loss": 0.3966,
+ "step": 182050
+ },
+ {
+ "epoch": 0.939526676675902,
+ "grad_norm": 21843.55859375,
+ "learning_rate": 9.506678294613919e-06,
+ "loss": 0.4051,
+ "step": 182100
+ },
+ {
+ "epoch": 0.9397846466585148,
+ "grad_norm": 22000.7734375,
+ "learning_rate": 9.485259996422313e-06,
+ "loss": 0.4042,
+ "step": 182150
+ },
+ {
+ "epoch": 0.9400426166411276,
+ "grad_norm": 23307.556640625,
+ "learning_rate": 9.463863324214395e-06,
+ "loss": 0.4018,
+ "step": 182200
+ },
+ {
+ "epoch": 0.9403005866237405,
+ "grad_norm": 22961.353515625,
+ "learning_rate": 9.4424882894113e-06,
+ "loss": 0.3991,
+ "step": 182250
+ },
+ {
+ "epoch": 0.9405585566063532,
+ "grad_norm": 24167.134765625,
+ "learning_rate": 9.421134903422607e-06,
+ "loss": 0.4033,
+ "step": 182300
+ },
+ {
+ "epoch": 0.9408165265889661,
+ "grad_norm": 24116.75,
+ "learning_rate": 9.399803177646339e-06,
+ "loss": 0.3979,
+ "step": 182350
+ },
+ {
+ "epoch": 0.9410744965715789,
+ "grad_norm": 25658.6640625,
+ "learning_rate": 9.378493123468946e-06,
+ "loss": 0.4093,
+ "step": 182400
+ },
+ {
+ "epoch": 0.9413324665541918,
+ "grad_norm": 27761.8828125,
+ "learning_rate": 9.357204752265341e-06,
+ "loss": 0.3974,
+ "step": 182450
+ },
+ {
+ "epoch": 0.9415904365368046,
+ "grad_norm": 23456.90234375,
+ "learning_rate": 9.335938075398842e-06,
+ "loss": 0.4072,
+ "step": 182500
+ },
+ {
+ "epoch": 0.9418484065194174,
+ "grad_norm": 21258.984375,
+ "learning_rate": 9.314693104221184e-06,
+ "loss": 0.3952,
+ "step": 182550
+ },
+ {
+ "epoch": 0.9421063765020302,
+ "grad_norm": 22634.01953125,
+ "learning_rate": 9.293469850072522e-06,
+ "loss": 0.402,
+ "step": 182600
+ },
+ {
+ "epoch": 0.9423643464846431,
+ "grad_norm": 22349.267578125,
+ "learning_rate": 9.272268324281407e-06,
+ "loss": 0.3974,
+ "step": 182650
+ },
+ {
+ "epoch": 0.9426223164672559,
+ "grad_norm": 23658.505859375,
+ "learning_rate": 9.251088538164837e-06,
+ "loss": 0.3979,
+ "step": 182700
+ },
+ {
+ "epoch": 0.9428802864498687,
+ "grad_norm": 26879.39453125,
+ "learning_rate": 9.229930503028129e-06,
+ "loss": 0.3965,
+ "step": 182750
+ },
+ {
+ "epoch": 0.9431382564324815,
+ "grad_norm": 25313.255859375,
+ "learning_rate": 9.208794230165058e-06,
+ "loss": 0.4049,
+ "step": 182800
+ },
+ {
+ "epoch": 0.9433962264150944,
+ "grad_norm": 26135.587890625,
+ "learning_rate": 9.187679730857756e-06,
+ "loss": 0.408,
+ "step": 182850
+ },
+ {
+ "epoch": 0.9436541963977072,
+ "grad_norm": 24064.087890625,
+ "learning_rate": 9.166587016376715e-06,
+ "loss": 0.4025,
+ "step": 182900
+ },
+ {
+ "epoch": 0.9439121663803199,
+ "grad_norm": 24475.30859375,
+ "learning_rate": 9.145516097980856e-06,
+ "loss": 0.4019,
+ "step": 182950
+ },
+ {
+ "epoch": 0.9441701363629328,
+ "grad_norm": 23691.06640625,
+ "learning_rate": 9.12446698691738e-06,
+ "loss": 0.4031,
+ "step": 183000
+ },
+ {
+ "epoch": 0.9444281063455456,
+ "grad_norm": 25653.37109375,
+ "learning_rate": 9.103439694421928e-06,
+ "loss": 0.4007,
+ "step": 183050
+ },
+ {
+ "epoch": 0.9446860763281585,
+ "grad_norm": 22718.71875,
+ "learning_rate": 9.08243423171845e-06,
+ "loss": 0.3996,
+ "step": 183100
+ },
+ {
+ "epoch": 0.9449440463107712,
+ "grad_norm": 23337.986328125,
+ "learning_rate": 9.061450610019262e-06,
+ "loss": 0.4043,
+ "step": 183150
+ },
+ {
+ "epoch": 0.9452020162933841,
+ "grad_norm": 27628.021484375,
+ "learning_rate": 9.040488840525001e-06,
+ "loss": 0.409,
+ "step": 183200
+ },
+ {
+ "epoch": 0.9454599862759969,
+ "grad_norm": 22894.26953125,
+ "learning_rate": 9.01954893442467e-06,
+ "loss": 0.4026,
+ "step": 183250
+ },
+ {
+ "epoch": 0.9457179562586098,
+ "grad_norm": 27624.564453125,
+ "learning_rate": 8.998630902895566e-06,
+ "loss": 0.4011,
+ "step": 183300
+ },
+ {
+ "epoch": 0.9459759262412226,
+ "grad_norm": 25944.05859375,
+ "learning_rate": 8.977734757103351e-06,
+ "loss": 0.3995,
+ "step": 183350
+ },
+ {
+ "epoch": 0.9462338962238354,
+ "grad_norm": 27243.31640625,
+ "learning_rate": 8.95686050820197e-06,
+ "loss": 0.3983,
+ "step": 183400
+ },
+ {
+ "epoch": 0.9464918662064482,
+ "grad_norm": 24556.611328125,
+ "learning_rate": 8.936008167333699e-06,
+ "loss": 0.4041,
+ "step": 183450
+ },
+ {
+ "epoch": 0.9467498361890611,
+ "grad_norm": 22205.880859375,
+ "learning_rate": 8.915177745629112e-06,
+ "loss": 0.3973,
+ "step": 183500
+ },
+ {
+ "epoch": 0.9470078061716739,
+ "grad_norm": 26829.6328125,
+ "learning_rate": 8.894369254207069e-06,
+ "loss": 0.4023,
+ "step": 183550
+ },
+ {
+ "epoch": 0.9472657761542866,
+ "grad_norm": 24388.59765625,
+ "learning_rate": 8.873582704174776e-06,
+ "loss": 0.397,
+ "step": 183600
+ },
+ {
+ "epoch": 0.9475237461368995,
+ "grad_norm": 25665.98828125,
+ "learning_rate": 8.852818106627647e-06,
+ "loss": 0.4055,
+ "step": 183650
+ },
+ {
+ "epoch": 0.9477817161195123,
+ "grad_norm": 24880.47265625,
+ "learning_rate": 8.83207547264946e-06,
+ "loss": 0.4016,
+ "step": 183700
+ },
+ {
+ "epoch": 0.9480396861021252,
+ "grad_norm": 26516.6953125,
+ "learning_rate": 8.81135481331221e-06,
+ "loss": 0.3992,
+ "step": 183750
+ },
+ {
+ "epoch": 0.9482976560847379,
+ "grad_norm": 22604.123046875,
+ "learning_rate": 8.790656139676179e-06,
+ "loss": 0.401,
+ "step": 183800
+ },
+ {
+ "epoch": 0.9485556260673508,
+ "grad_norm": 24668.94921875,
+ "learning_rate": 8.769979462789957e-06,
+ "loss": 0.3974,
+ "step": 183850
+ },
+ {
+ "epoch": 0.9488135960499636,
+ "grad_norm": 26522.896484375,
+ "learning_rate": 8.749324793690295e-06,
+ "loss": 0.4048,
+ "step": 183900
+ },
+ {
+ "epoch": 0.9490715660325765,
+ "grad_norm": 26786.48046875,
+ "learning_rate": 8.728692143402295e-06,
+ "loss": 0.4075,
+ "step": 183950
+ },
+ {
+ "epoch": 0.9493295360151893,
+ "grad_norm": 23683.54296875,
+ "learning_rate": 8.708081522939265e-06,
+ "loss": 0.3996,
+ "step": 184000
+ },
+ {
+ "epoch": 0.9495875059978021,
+ "grad_norm": 23064.400390625,
+ "learning_rate": 8.687492943302739e-06,
+ "loss": 0.4036,
+ "step": 184050
+ },
+ {
+ "epoch": 0.9498454759804149,
+ "grad_norm": 24142.4921875,
+ "learning_rate": 8.666926415482501e-06,
+ "loss": 0.4023,
+ "step": 184100
+ },
+ {
+ "epoch": 0.9501034459630278,
+ "grad_norm": 24012.076171875,
+ "learning_rate": 8.6463819504566e-06,
+ "loss": 0.4024,
+ "step": 184150
+ },
+ {
+ "epoch": 0.9503614159456406,
+ "grad_norm": 22214.41015625,
+ "learning_rate": 8.625859559191224e-06,
+ "loss": 0.4002,
+ "step": 184200
+ },
+ {
+ "epoch": 0.9506193859282533,
+ "grad_norm": 24664.162109375,
+ "learning_rate": 8.60535925264086e-06,
+ "loss": 0.4027,
+ "step": 184250
+ },
+ {
+ "epoch": 0.9508773559108662,
+ "grad_norm": 21136.900390625,
+ "learning_rate": 8.584881041748171e-06,
+ "loss": 0.3957,
+ "step": 184300
+ },
+ {
+ "epoch": 0.951135325893479,
+ "grad_norm": 22411.33984375,
+ "learning_rate": 8.56442493744401e-06,
+ "loss": 0.3977,
+ "step": 184350
+ },
+ {
+ "epoch": 0.9513932958760919,
+ "grad_norm": 23004.173828125,
+ "learning_rate": 8.54399095064749e-06,
+ "loss": 0.4014,
+ "step": 184400
+ },
+ {
+ "epoch": 0.9516512658587046,
+ "grad_norm": 23692.26171875,
+ "learning_rate": 8.523579092265827e-06,
+ "loss": 0.4013,
+ "step": 184450
+ },
+ {
+ "epoch": 0.9519092358413175,
+ "grad_norm": 25310.919921875,
+ "learning_rate": 8.503189373194509e-06,
+ "loss": 0.3961,
+ "step": 184500
+ },
+ {
+ "epoch": 0.9521672058239303,
+ "grad_norm": 25963.943359375,
+ "learning_rate": 8.482821804317171e-06,
+ "loss": 0.4049,
+ "step": 184550
+ },
+ {
+ "epoch": 0.9524251758065432,
+ "grad_norm": 24282.115234375,
+ "learning_rate": 8.46247639650562e-06,
+ "loss": 0.4008,
+ "step": 184600
+ },
+ {
+ "epoch": 0.952683145789156,
+ "grad_norm": 24703.26953125,
+ "learning_rate": 8.442153160619837e-06,
+ "loss": 0.4063,
+ "step": 184650
+ },
+ {
+ "epoch": 0.9529411157717688,
+ "grad_norm": 23616.09375,
+ "learning_rate": 8.421852107507966e-06,
+ "loss": 0.3974,
+ "step": 184700
+ },
+ {
+ "epoch": 0.9531990857543816,
+ "grad_norm": 25447.408203125,
+ "learning_rate": 8.40157324800634e-06,
+ "loss": 0.4066,
+ "step": 184750
+ },
+ {
+ "epoch": 0.9534570557369945,
+ "grad_norm": 25534.3984375,
+ "learning_rate": 8.381316592939403e-06,
+ "loss": 0.4027,
+ "step": 184800
+ },
+ {
+ "epoch": 0.9537150257196073,
+ "grad_norm": 24251.138671875,
+ "learning_rate": 8.361082153119777e-06,
+ "loss": 0.3958,
+ "step": 184850
+ },
+ {
+ "epoch": 0.95397299570222,
+ "grad_norm": 26980.046875,
+ "learning_rate": 8.3408699393482e-06,
+ "loss": 0.4058,
+ "step": 184900
+ },
+ {
+ "epoch": 0.9542309656848329,
+ "grad_norm": 26143.732421875,
+ "learning_rate": 8.320679962413574e-06,
+ "loss": 0.4006,
+ "step": 184950
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "grad_norm": 24566.15234375,
+ "learning_rate": 8.300512233092893e-06,
+ "loss": 0.405,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "eval_loss": 0.3880694806575775,
+ "eval_runtime": 3197.8794,
+ "eval_samples_per_second": 969.743,
+ "eval_steps_per_second": 1.894,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9547469056500586,
+ "grad_norm": 22463.359375,
+ "learning_rate": 8.280366762151349e-06,
+ "loss": 0.4035,
+ "step": 185050
+ },
+ {
+ "epoch": 0.9550048756326713,
+ "grad_norm": 23964.845703125,
+ "learning_rate": 8.260243560342146e-06,
+ "loss": 0.399,
+ "step": 185100
+ },
+ {
+ "epoch": 0.9552628456152842,
+ "grad_norm": 22267.978515625,
+ "learning_rate": 8.2401426384067e-06,
+ "loss": 0.4065,
+ "step": 185150
+ },
+ {
+ "epoch": 0.955520815597897,
+ "grad_norm": 23959.732421875,
+ "learning_rate": 8.220064007074485e-06,
+ "loss": 0.3988,
+ "step": 185200
+ },
+ {
+ "epoch": 0.9557787855805099,
+ "grad_norm": 22042.95703125,
+ "learning_rate": 8.200007677063066e-06,
+ "loss": 0.4005,
+ "step": 185250
+ },
+ {
+ "epoch": 0.9560367555631226,
+ "grad_norm": 23760.798828125,
+ "learning_rate": 8.17997365907816e-06,
+ "loss": 0.4043,
+ "step": 185300
+ },
+ {
+ "epoch": 0.9562947255457355,
+ "grad_norm": 23235.8828125,
+ "learning_rate": 8.1599619638135e-06,
+ "loss": 0.3999,
+ "step": 185350
+ },
+ {
+ "epoch": 0.9565526955283483,
+ "grad_norm": 22637.701171875,
+ "learning_rate": 8.139972601950967e-06,
+ "loss": 0.4004,
+ "step": 185400
+ },
+ {
+ "epoch": 0.9568106655109612,
+ "grad_norm": 28806.810546875,
+ "learning_rate": 8.120005584160489e-06,
+ "loss": 0.4022,
+ "step": 185450
+ },
+ {
+ "epoch": 0.957068635493574,
+ "grad_norm": 22143.8203125,
+ "learning_rate": 8.100060921100067e-06,
+ "loss": 0.3977,
+ "step": 185500
+ },
+ {
+ "epoch": 0.9573266054761868,
+ "grad_norm": 22921.810546875,
+ "learning_rate": 8.080138623415783e-06,
+ "loss": 0.4,
+ "step": 185550
+ },
+ {
+ "epoch": 0.9575845754587996,
+ "grad_norm": 25425.640625,
+ "learning_rate": 8.060238701741762e-06,
+ "loss": 0.4021,
+ "step": 185600
+ },
+ {
+ "epoch": 0.9578425454414125,
+ "grad_norm": 27279.6796875,
+ "learning_rate": 8.040361166700216e-06,
+ "loss": 0.4064,
+ "step": 185650
+ },
+ {
+ "epoch": 0.9581005154240253,
+ "grad_norm": 25144.322265625,
+ "learning_rate": 8.020506028901376e-06,
+ "loss": 0.4031,
+ "step": 185700
+ },
+ {
+ "epoch": 0.958358485406638,
+ "grad_norm": 21046.607421875,
+ "learning_rate": 8.000673298943534e-06,
+ "loss": 0.4041,
+ "step": 185750
+ },
+ {
+ "epoch": 0.9586164553892509,
+ "grad_norm": 23166.087890625,
+ "learning_rate": 7.980862987413018e-06,
+ "loss": 0.3996,
+ "step": 185800
+ },
+ {
+ "epoch": 0.9588744253718637,
+ "grad_norm": 23506.693359375,
+ "learning_rate": 7.961075104884186e-06,
+ "loss": 0.3973,
+ "step": 185850
+ },
+ {
+ "epoch": 0.9591323953544766,
+ "grad_norm": 25975.408203125,
+ "learning_rate": 7.94130966191941e-06,
+ "loss": 0.4048,
+ "step": 185900
+ },
+ {
+ "epoch": 0.9593903653370893,
+ "grad_norm": 23704.638671875,
+ "learning_rate": 7.921566669069147e-06,
+ "loss": 0.4045,
+ "step": 185950
+ },
+ {
+ "epoch": 0.9596483353197022,
+ "grad_norm": 27402.2421875,
+ "learning_rate": 7.901846136871766e-06,
+ "loss": 0.4007,
+ "step": 186000
+ },
+ {
+ "epoch": 0.959906305302315,
+ "grad_norm": 23186.658203125,
+ "learning_rate": 7.882148075853752e-06,
+ "loss": 0.4072,
+ "step": 186050
+ },
+ {
+ "epoch": 0.9601642752849279,
+ "grad_norm": 24789.619140625,
+ "learning_rate": 7.862472496529528e-06,
+ "loss": 0.4056,
+ "step": 186100
+ },
+ {
+ "epoch": 0.9604222452675407,
+ "grad_norm": 23849.71875,
+ "learning_rate": 7.842819409401524e-06,
+ "loss": 0.4067,
+ "step": 186150
+ },
+ {
+ "epoch": 0.9606802152501535,
+ "grad_norm": 24820.765625,
+ "learning_rate": 7.823188824960221e-06,
+ "loss": 0.4071,
+ "step": 186200
+ },
+ {
+ "epoch": 0.9609381852327663,
+ "grad_norm": 23276.568359375,
+ "learning_rate": 7.803580753683992e-06,
+ "loss": 0.3989,
+ "step": 186250
+ },
+ {
+ "epoch": 0.9611961552153792,
+ "grad_norm": 21064.8984375,
+ "learning_rate": 7.783995206039279e-06,
+ "loss": 0.3994,
+ "step": 186300
+ },
+ {
+ "epoch": 0.961454125197992,
+ "grad_norm": 27310.30078125,
+ "learning_rate": 7.764432192480464e-06,
+ "loss": 0.4015,
+ "step": 186350
+ },
+ {
+ "epoch": 0.9617120951806047,
+ "grad_norm": 24786.1796875,
+ "learning_rate": 7.744891723449888e-06,
+ "loss": 0.4042,
+ "step": 186400
+ },
+ {
+ "epoch": 0.9619700651632176,
+ "grad_norm": 22362.47265625,
+ "learning_rate": 7.725373809377911e-06,
+ "loss": 0.3991,
+ "step": 186450
+ },
+ {
+ "epoch": 0.9622280351458304,
+ "grad_norm": 23751.4296875,
+ "learning_rate": 7.705878460682775e-06,
+ "loss": 0.3988,
+ "step": 186500
+ },
+ {
+ "epoch": 0.9624860051284433,
+ "grad_norm": 22956.935546875,
+ "learning_rate": 7.686405687770748e-06,
+ "loss": 0.4049,
+ "step": 186550
+ },
+ {
+ "epoch": 0.962743975111056,
+ "grad_norm": 25276.861328125,
+ "learning_rate": 7.666955501036006e-06,
+ "loss": 0.4005,
+ "step": 186600
+ },
+ {
+ "epoch": 0.9630019450936689,
+ "grad_norm": 22390.625,
+ "learning_rate": 7.647527910860691e-06,
+ "loss": 0.4008,
+ "step": 186650
+ },
+ {
+ "epoch": 0.9632599150762817,
+ "grad_norm": 28946.125,
+ "learning_rate": 7.628122927614856e-06,
+ "loss": 0.3987,
+ "step": 186700
+ },
+ {
+ "epoch": 0.9635178850588946,
+ "grad_norm": 23663.3125,
+ "learning_rate": 7.608740561656541e-06,
+ "loss": 0.4006,
+ "step": 186750
+ },
+ {
+ "epoch": 0.9637758550415074,
+ "grad_norm": 21705.16015625,
+ "learning_rate": 7.589380823331632e-06,
+ "loss": 0.4023,
+ "step": 186800
+ },
+ {
+ "epoch": 0.9640338250241202,
+ "grad_norm": 25353.228515625,
+ "learning_rate": 7.570043722974019e-06,
+ "loss": 0.4006,
+ "step": 186850
+ },
+ {
+ "epoch": 0.964291795006733,
+ "grad_norm": 26046.412109375,
+ "learning_rate": 7.55072927090546e-06,
+ "loss": 0.3931,
+ "step": 186900
+ },
+ {
+ "epoch": 0.9645497649893459,
+ "grad_norm": 25989.2578125,
+ "learning_rate": 7.531437477435621e-06,
+ "loss": 0.3989,
+ "step": 186950
+ },
+ {
+ "epoch": 0.9648077349719587,
+ "grad_norm": 22714.423828125,
+ "learning_rate": 7.51216835286212e-06,
+ "loss": 0.4018,
+ "step": 187000
+ },
+ {
+ "epoch": 0.9650657049545714,
+ "grad_norm": 26353.42578125,
+ "learning_rate": 7.492921907470407e-06,
+ "loss": 0.4056,
+ "step": 187050
+ },
+ {
+ "epoch": 0.9653236749371843,
+ "grad_norm": 23085.212890625,
+ "learning_rate": 7.4736981515338864e-06,
+ "loss": 0.3995,
+ "step": 187100
+ },
+ {
+ "epoch": 0.9655816449197971,
+ "grad_norm": 23125.970703125,
+ "learning_rate": 7.454497095313817e-06,
+ "loss": 0.4069,
+ "step": 187150
+ },
+ {
+ "epoch": 0.96583961490241,
+ "grad_norm": 23488.2265625,
+ "learning_rate": 7.435318749059356e-06,
+ "loss": 0.4039,
+ "step": 187200
+ },
+ {
+ "epoch": 0.9660975848850227,
+ "grad_norm": 22577.46875,
+ "learning_rate": 7.4161631230075305e-06,
+ "loss": 0.4051,
+ "step": 187250
+ },
+ {
+ "epoch": 0.9663555548676356,
+ "grad_norm": 22637.890625,
+ "learning_rate": 7.397030227383228e-06,
+ "loss": 0.3986,
+ "step": 187300
+ },
+ {
+ "epoch": 0.9666135248502484,
+ "grad_norm": 26084.412109375,
+ "learning_rate": 7.377920072399247e-06,
+ "loss": 0.398,
+ "step": 187350
+ },
+ {
+ "epoch": 0.9668714948328613,
+ "grad_norm": 25263.6328125,
+ "learning_rate": 7.3588326682562e-06,
+ "loss": 0.4035,
+ "step": 187400
+ },
+ {
+ "epoch": 0.9671294648154741,
+ "grad_norm": 22348.236328125,
+ "learning_rate": 7.339768025142573e-06,
+ "loss": 0.4003,
+ "step": 187450
+ },
+ {
+ "epoch": 0.9673874347980869,
+ "grad_norm": 23006.091796875,
+ "learning_rate": 7.320726153234714e-06,
+ "loss": 0.399,
+ "step": 187500
+ },
+ {
+ "epoch": 0.9676454047806997,
+ "grad_norm": 24137.44921875,
+ "learning_rate": 7.301707062696794e-06,
+ "loss": 0.3999,
+ "step": 187550
+ },
+ {
+ "epoch": 0.9679033747633126,
+ "grad_norm": 26101.837890625,
+ "learning_rate": 7.282710763680828e-06,
+ "loss": 0.4007,
+ "step": 187600
+ },
+ {
+ "epoch": 0.9681613447459254,
+ "grad_norm": 21417.814453125,
+ "learning_rate": 7.263737266326709e-06,
+ "loss": 0.3994,
+ "step": 187650
+ },
+ {
+ "epoch": 0.9684193147285381,
+ "grad_norm": 25831.45703125,
+ "learning_rate": 7.244786580762075e-06,
+ "loss": 0.3925,
+ "step": 187700
+ },
+ {
+ "epoch": 0.968677284711151,
+ "grad_norm": 24546.84765625,
+ "learning_rate": 7.225858717102474e-06,
+ "loss": 0.4004,
+ "step": 187750
+ },
+ {
+ "epoch": 0.9689352546937638,
+ "grad_norm": 23773.09765625,
+ "learning_rate": 7.206953685451212e-06,
+ "loss": 0.4041,
+ "step": 187800
+ },
+ {
+ "epoch": 0.9691932246763767,
+ "grad_norm": 23538.923828125,
+ "learning_rate": 7.188071495899423e-06,
+ "loss": 0.3971,
+ "step": 187850
+ },
+ {
+ "epoch": 0.9694511946589894,
+ "grad_norm": 24968.310546875,
+ "learning_rate": 7.169212158526084e-06,
+ "loss": 0.4047,
+ "step": 187900
+ },
+ {
+ "epoch": 0.9697091646416023,
+ "grad_norm": 24379.23828125,
+ "learning_rate": 7.150375683397908e-06,
+ "loss": 0.3983,
+ "step": 187950
+ },
+ {
+ "epoch": 0.9699671346242151,
+ "grad_norm": 25501.638671875,
+ "learning_rate": 7.131562080569465e-06,
+ "loss": 0.4024,
+ "step": 188000
+ },
+ {
+ "epoch": 0.970225104606828,
+ "grad_norm": 24917.73046875,
+ "learning_rate": 7.112771360083087e-06,
+ "loss": 0.3998,
+ "step": 188050
+ },
+ {
+ "epoch": 0.9704830745894407,
+ "grad_norm": 24725.638671875,
+ "learning_rate": 7.094003531968896e-06,
+ "loss": 0.3964,
+ "step": 188100
+ },
+ {
+ "epoch": 0.9707410445720536,
+ "grad_norm": 23913.5703125,
+ "learning_rate": 7.075258606244789e-06,
+ "loss": 0.3987,
+ "step": 188150
+ },
+ {
+ "epoch": 0.9709990145546664,
+ "grad_norm": 25010.09375,
+ "learning_rate": 7.05653659291644e-06,
+ "loss": 0.4021,
+ "step": 188200
+ },
+ {
+ "epoch": 0.9712569845372793,
+ "grad_norm": 25357.556640625,
+ "learning_rate": 7.037837501977318e-06,
+ "loss": 0.4007,
+ "step": 188250
+ },
+ {
+ "epoch": 0.9715149545198921,
+ "grad_norm": 24599.890625,
+ "learning_rate": 7.019161343408625e-06,
+ "loss": 0.3962,
+ "step": 188300
+ },
+ {
+ "epoch": 0.9717729245025049,
+ "grad_norm": 25866.2734375,
+ "learning_rate": 7.000508127179328e-06,
+ "loss": 0.3983,
+ "step": 188350
+ },
+ {
+ "epoch": 0.9720308944851177,
+ "grad_norm": 22591.40625,
+ "learning_rate": 6.981877863246161e-06,
+ "loss": 0.3971,
+ "step": 188400
+ },
+ {
+ "epoch": 0.9722888644677306,
+ "grad_norm": 20752.091796875,
+ "learning_rate": 6.963270561553586e-06,
+ "loss": 0.3946,
+ "step": 188450
+ },
+ {
+ "epoch": 0.9725468344503434,
+ "grad_norm": 22927.109375,
+ "learning_rate": 6.94468623203382e-06,
+ "loss": 0.4036,
+ "step": 188500
+ },
+ {
+ "epoch": 0.9728048044329561,
+ "grad_norm": 27096.041015625,
+ "learning_rate": 6.92612488460685e-06,
+ "loss": 0.3982,
+ "step": 188550
+ },
+ {
+ "epoch": 0.973062774415569,
+ "grad_norm": 24426.93359375,
+ "learning_rate": 6.907586529180321e-06,
+ "loss": 0.4054,
+ "step": 188600
+ },
+ {
+ "epoch": 0.9733207443981818,
+ "grad_norm": 25097.658203125,
+ "learning_rate": 6.889071175649669e-06,
+ "loss": 0.4015,
+ "step": 188650
+ },
+ {
+ "epoch": 0.9735787143807947,
+ "grad_norm": 24646.548828125,
+ "learning_rate": 6.870578833898033e-06,
+ "loss": 0.3977,
+ "step": 188700
+ },
+ {
+ "epoch": 0.9738366843634074,
+ "grad_norm": 23465.357421875,
+ "learning_rate": 6.852109513796257e-06,
+ "loss": 0.396,
+ "step": 188750
+ },
+ {
+ "epoch": 0.9740946543460203,
+ "grad_norm": 22382.603515625,
+ "learning_rate": 6.83366322520293e-06,
+ "loss": 0.4018,
+ "step": 188800
+ },
+ {
+ "epoch": 0.9743526243286331,
+ "grad_norm": 24666.61328125,
+ "learning_rate": 6.815239977964283e-06,
+ "loss": 0.4046,
+ "step": 188850
+ },
+ {
+ "epoch": 0.974610594311246,
+ "grad_norm": 25308.685546875,
+ "learning_rate": 6.796839781914321e-06,
+ "loss": 0.3998,
+ "step": 188900
+ },
+ {
+ "epoch": 0.9748685642938588,
+ "grad_norm": 24856.64453125,
+ "learning_rate": 6.778462646874706e-06,
+ "loss": 0.4014,
+ "step": 188950
+ },
+ {
+ "epoch": 0.9751265342764716,
+ "grad_norm": 27452.50390625,
+ "learning_rate": 6.760108582654795e-06,
+ "loss": 0.4008,
+ "step": 189000
+ },
+ {
+ "epoch": 0.9753845042590844,
+ "grad_norm": 25027.416015625,
+ "learning_rate": 6.741777599051629e-06,
+ "loss": 0.4006,
+ "step": 189050
+ },
+ {
+ "epoch": 0.9756424742416973,
+ "grad_norm": 24687.740234375,
+ "learning_rate": 6.723469705849927e-06,
+ "loss": 0.4056,
+ "step": 189100
+ },
+ {
+ "epoch": 0.9759004442243101,
+ "grad_norm": 24812.55078125,
+ "learning_rate": 6.705184912822105e-06,
+ "loss": 0.4043,
+ "step": 189150
+ },
+ {
+ "epoch": 0.9761584142069228,
+ "grad_norm": 25776.005859375,
+ "learning_rate": 6.686923229728214e-06,
+ "loss": 0.4052,
+ "step": 189200
+ },
+ {
+ "epoch": 0.9764163841895357,
+ "grad_norm": 24319.34765625,
+ "learning_rate": 6.668684666316005e-06,
+ "loss": 0.4014,
+ "step": 189250
+ },
+ {
+ "epoch": 0.9766743541721485,
+ "grad_norm": 28024.419921875,
+ "learning_rate": 6.650469232320839e-06,
+ "loss": 0.3991,
+ "step": 189300
+ },
+ {
+ "epoch": 0.9769323241547614,
+ "grad_norm": 25074.068359375,
+ "learning_rate": 6.6322769374658085e-06,
+ "loss": 0.4034,
+ "step": 189350
+ },
+ {
+ "epoch": 0.9771902941373741,
+ "grad_norm": 21126.572265625,
+ "learning_rate": 6.61410779146156e-06,
+ "loss": 0.3998,
+ "step": 189400
+ },
+ {
+ "epoch": 0.977448264119987,
+ "grad_norm": 25041.337890625,
+ "learning_rate": 6.595961804006467e-06,
+ "loss": 0.4012,
+ "step": 189450
+ },
+ {
+ "epoch": 0.9777062341025998,
+ "grad_norm": 25474.263671875,
+ "learning_rate": 6.577838984786489e-06,
+ "loss": 0.3991,
+ "step": 189500
+ },
+ {
+ "epoch": 0.9779642040852127,
+ "grad_norm": 22192.98828125,
+ "learning_rate": 6.55973934347523e-06,
+ "loss": 0.3965,
+ "step": 189550
+ },
+ {
+ "epoch": 0.9782221740678255,
+ "grad_norm": 24587.9453125,
+ "learning_rate": 6.5416628897339625e-06,
+ "loss": 0.4008,
+ "step": 189600
+ },
+ {
+ "epoch": 0.9784801440504383,
+ "grad_norm": 23246.314453125,
+ "learning_rate": 6.523609633211497e-06,
+ "loss": 0.4036,
+ "step": 189650
+ },
+ {
+ "epoch": 0.9787381140330511,
+ "grad_norm": 24233.033203125,
+ "learning_rate": 6.505579583544353e-06,
+ "loss": 0.4002,
+ "step": 189700
+ },
+ {
+ "epoch": 0.978996084015664,
+ "grad_norm": 24149.6953125,
+ "learning_rate": 6.487572750356602e-06,
+ "loss": 0.4043,
+ "step": 189750
+ },
+ {
+ "epoch": 0.9792540539982768,
+ "grad_norm": 25376.3046875,
+ "learning_rate": 6.469589143259952e-06,
+ "loss": 0.3997,
+ "step": 189800
+ },
+ {
+ "epoch": 0.9795120239808895,
+ "grad_norm": 25878.90625,
+ "learning_rate": 6.451628771853696e-06,
+ "loss": 0.3936,
+ "step": 189850
+ },
+ {
+ "epoch": 0.9797699939635024,
+ "grad_norm": 24123.169921875,
+ "learning_rate": 6.433691645724743e-06,
+ "loss": 0.3976,
+ "step": 189900
+ },
+ {
+ "epoch": 0.9800279639461152,
+ "grad_norm": 23894.5625,
+ "learning_rate": 6.4157777744475626e-06,
+ "loss": 0.4025,
+ "step": 189950
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "grad_norm": 27271.9609375,
+ "learning_rate": 6.3978871675842544e-06,
+ "loss": 0.4007,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "eval_loss": 0.3872862458229065,
+ "eval_runtime": 3184.1416,
+ "eval_samples_per_second": 973.927,
+ "eval_steps_per_second": 1.902,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9805439039113408,
+ "grad_norm": 25592.9296875,
+ "learning_rate": 6.380019834684475e-06,
+ "loss": 0.4041,
+ "step": 190050
+ },
+ {
+ "epoch": 0.9808018738939537,
+ "grad_norm": 22425.51953125,
+ "learning_rate": 6.362175785285457e-06,
+ "loss": 0.4028,
+ "step": 190100
+ },
+ {
+ "epoch": 0.9810598438765665,
+ "grad_norm": 25178.28125,
+ "learning_rate": 6.344355028912008e-06,
+ "loss": 0.3972,
+ "step": 190150
+ },
+ {
+ "epoch": 0.9813178138591794,
+ "grad_norm": 25157.537109375,
+ "learning_rate": 6.326557575076486e-06,
+ "loss": 0.3989,
+ "step": 190200
+ },
+ {
+ "epoch": 0.9815757838417921,
+ "grad_norm": 23774.67578125,
+ "learning_rate": 6.3087834332788695e-06,
+ "loss": 0.4057,
+ "step": 190250
+ },
+ {
+ "epoch": 0.981833753824405,
+ "grad_norm": 25307.736328125,
+ "learning_rate": 6.2910326130066035e-06,
+ "loss": 0.3946,
+ "step": 190300
+ },
+ {
+ "epoch": 0.9820917238070178,
+ "grad_norm": 28657.8125,
+ "learning_rate": 6.273305123734769e-06,
+ "loss": 0.4006,
+ "step": 190350
+ },
+ {
+ "epoch": 0.9823496937896307,
+ "grad_norm": 24404.603515625,
+ "learning_rate": 6.255600974925935e-06,
+ "loss": 0.3998,
+ "step": 190400
+ },
+ {
+ "epoch": 0.9826076637722435,
+ "grad_norm": 22460.1640625,
+ "learning_rate": 6.237920176030232e-06,
+ "loss": 0.4039,
+ "step": 190450
+ },
+ {
+ "epoch": 0.9828656337548562,
+ "grad_norm": 27335.625,
+ "learning_rate": 6.220262736485355e-06,
+ "loss": 0.3937,
+ "step": 190500
+ },
+ {
+ "epoch": 0.9831236037374691,
+ "grad_norm": 27996.9765625,
+ "learning_rate": 6.202628665716464e-06,
+ "loss": 0.4025,
+ "step": 190550
+ },
+ {
+ "epoch": 0.983381573720082,
+ "grad_norm": 23532.66796875,
+ "learning_rate": 6.18501797313632e-06,
+ "loss": 0.4007,
+ "step": 190600
+ },
+ {
+ "epoch": 0.9836395437026948,
+ "grad_norm": 27360.333984375,
+ "learning_rate": 6.167430668145146e-06,
+ "loss": 0.3994,
+ "step": 190650
+ },
+ {
+ "epoch": 0.9838975136853075,
+ "grad_norm": 23754.23828125,
+ "learning_rate": 6.149866760130718e-06,
+ "loss": 0.4043,
+ "step": 190700
+ },
+ {
+ "epoch": 0.9841554836679204,
+ "grad_norm": 24313.943359375,
+ "learning_rate": 6.1323262584683075e-06,
+ "loss": 0.4039,
+ "step": 190750
+ },
+ {
+ "epoch": 0.9844134536505332,
+ "grad_norm": 22932.11328125,
+ "learning_rate": 6.114809172520686e-06,
+ "loss": 0.3977,
+ "step": 190800
+ },
+ {
+ "epoch": 0.9846714236331461,
+ "grad_norm": 27614.103515625,
+ "learning_rate": 6.097315511638135e-06,
+ "loss": 0.405,
+ "step": 190850
+ },
+ {
+ "epoch": 0.9849293936157588,
+ "grad_norm": 21648.470703125,
+ "learning_rate": 6.079845285158447e-06,
+ "loss": 0.403,
+ "step": 190900
+ },
+ {
+ "epoch": 0.9851873635983717,
+ "grad_norm": 25720.76953125,
+ "learning_rate": 6.0623985024068854e-06,
+ "loss": 0.4069,
+ "step": 190950
+ },
+ {
+ "epoch": 0.9854453335809845,
+ "grad_norm": 22051.30078125,
+ "learning_rate": 6.044975172696199e-06,
+ "loss": 0.4062,
+ "step": 191000
+ },
+ {
+ "epoch": 0.9857033035635974,
+ "grad_norm": 27862.138671875,
+ "learning_rate": 6.027575305326621e-06,
+ "loss": 0.4029,
+ "step": 191050
+ },
+ {
+ "epoch": 0.9859612735462102,
+ "grad_norm": 24624.951171875,
+ "learning_rate": 6.010198909585862e-06,
+ "loss": 0.3995,
+ "step": 191100
+ },
+ {
+ "epoch": 0.986219243528823,
+ "grad_norm": 23278.45703125,
+ "learning_rate": 5.992845994749136e-06,
+ "loss": 0.3981,
+ "step": 191150
+ },
+ {
+ "epoch": 0.9864772135114358,
+ "grad_norm": 27549.26953125,
+ "learning_rate": 5.975516570079048e-06,
+ "loss": 0.3999,
+ "step": 191200
+ },
+ {
+ "epoch": 0.9867351834940487,
+ "grad_norm": 24570.40625,
+ "learning_rate": 5.95821064482574e-06,
+ "loss": 0.4052,
+ "step": 191250
+ },
+ {
+ "epoch": 0.9869931534766615,
+ "grad_norm": 23672.029296875,
+ "learning_rate": 5.9409282282267665e-06,
+ "loss": 0.4045,
+ "step": 191300
+ },
+ {
+ "epoch": 0.9872511234592742,
+ "grad_norm": 22627.697265625,
+ "learning_rate": 5.923669329507148e-06,
+ "loss": 0.4017,
+ "step": 191350
+ },
+ {
+ "epoch": 0.9875090934418871,
+ "grad_norm": 22583.0390625,
+ "learning_rate": 5.906433957879365e-06,
+ "loss": 0.399,
+ "step": 191400
+ },
+ {
+ "epoch": 0.9877670634244999,
+ "grad_norm": 22665.984375,
+ "learning_rate": 5.889222122543298e-06,
+ "loss": 0.3989,
+ "step": 191450
+ },
+ {
+ "epoch": 0.9880250334071128,
+ "grad_norm": 25125.6640625,
+ "learning_rate": 5.872033832686319e-06,
+ "loss": 0.4001,
+ "step": 191500
+ },
+ {
+ "epoch": 0.9882830033897255,
+ "grad_norm": 24863.34375,
+ "learning_rate": 5.8548690974831845e-06,
+ "loss": 0.3991,
+ "step": 191550
+ },
+ {
+ "epoch": 0.9885409733723384,
+ "grad_norm": 23538.44921875,
+ "learning_rate": 5.837727926096109e-06,
+ "loss": 0.3979,
+ "step": 191600
+ },
+ {
+ "epoch": 0.9887989433549512,
+ "grad_norm": 23396.3203125,
+ "learning_rate": 5.820610327674708e-06,
+ "loss": 0.4049,
+ "step": 191650
+ },
+ {
+ "epoch": 0.9890569133375641,
+ "grad_norm": 22553.01171875,
+ "learning_rate": 5.803516311356044e-06,
+ "loss": 0.3983,
+ "step": 191700
+ },
+ {
+ "epoch": 0.9893148833201769,
+ "grad_norm": 25163.04296875,
+ "learning_rate": 5.786445886264541e-06,
+ "loss": 0.3969,
+ "step": 191750
+ },
+ {
+ "epoch": 0.9895728533027897,
+ "grad_norm": 22826.181640625,
+ "learning_rate": 5.769399061512093e-06,
+ "loss": 0.4016,
+ "step": 191800
+ },
+ {
+ "epoch": 0.9898308232854025,
+ "grad_norm": 22302.7265625,
+ "learning_rate": 5.752375846197944e-06,
+ "loss": 0.3988,
+ "step": 191850
+ },
+ {
+ "epoch": 0.9900887932680154,
+ "grad_norm": 20985.990234375,
+ "learning_rate": 5.735376249408753e-06,
+ "loss": 0.3952,
+ "step": 191900
+ },
+ {
+ "epoch": 0.9903467632506282,
+ "grad_norm": 23513.19921875,
+ "learning_rate": 5.718400280218611e-06,
+ "loss": 0.4052,
+ "step": 191950
+ },
+ {
+ "epoch": 0.9906047332332409,
+ "grad_norm": 23184.818359375,
+ "learning_rate": 5.7014479476889145e-06,
+ "loss": 0.399,
+ "step": 192000
+ },
+ {
+ "epoch": 0.9908627032158538,
+ "grad_norm": 23472.9453125,
+ "learning_rate": 5.684519260868521e-06,
+ "loss": 0.3946,
+ "step": 192050
+ },
+ {
+ "epoch": 0.9911206731984666,
+ "grad_norm": 26255.388671875,
+ "learning_rate": 5.667614228793622e-06,
+ "loss": 0.3964,
+ "step": 192100
+ },
+ {
+ "epoch": 0.9913786431810795,
+ "grad_norm": 23894.54296875,
+ "learning_rate": 5.650732860487806e-06,
+ "loss": 0.3928,
+ "step": 192150
+ },
+ {
+ "epoch": 0.9916366131636922,
+ "grad_norm": 24135.478515625,
+ "learning_rate": 5.633875164962016e-06,
+ "loss": 0.4019,
+ "step": 192200
+ },
+ {
+ "epoch": 0.9918945831463051,
+ "grad_norm": 26928.08984375,
+ "learning_rate": 5.617041151214553e-06,
+ "loss": 0.3958,
+ "step": 192250
+ },
+ {
+ "epoch": 0.9921525531289179,
+ "grad_norm": 22469.884765625,
+ "learning_rate": 5.600230828231107e-06,
+ "loss": 0.4031,
+ "step": 192300
+ },
+ {
+ "epoch": 0.9924105231115308,
+ "grad_norm": 23694.59765625,
+ "learning_rate": 5.583444204984695e-06,
+ "loss": 0.3926,
+ "step": 192350
+ },
+ {
+ "epoch": 0.9926684930941435,
+ "grad_norm": 23482.986328125,
+ "learning_rate": 5.566681290435688e-06,
+ "loss": 0.4112,
+ "step": 192400
+ },
+ {
+ "epoch": 0.9929264630767564,
+ "grad_norm": 22524.994140625,
+ "learning_rate": 5.549942093531812e-06,
+ "loss": 0.3981,
+ "step": 192450
+ },
+ {
+ "epoch": 0.9931844330593692,
+ "grad_norm": 27258.35546875,
+ "learning_rate": 5.5332266232081155e-06,
+ "loss": 0.4024,
+ "step": 192500
+ },
+ {
+ "epoch": 0.9934424030419821,
+ "grad_norm": 19928.40625,
+ "learning_rate": 5.516534888386992e-06,
+ "loss": 0.4028,
+ "step": 192550
+ },
+ {
+ "epoch": 0.9937003730245949,
+ "grad_norm": 21809.205078125,
+ "learning_rate": 5.499866897978189e-06,
+ "loss": 0.3996,
+ "step": 192600
+ },
+ {
+ "epoch": 0.9939583430072076,
+ "grad_norm": 22132.6171875,
+ "learning_rate": 5.483222660878729e-06,
+ "loss": 0.4012,
+ "step": 192650
+ },
+ {
+ "epoch": 0.9942163129898205,
+ "grad_norm": 25306.728515625,
+ "learning_rate": 5.466602185973002e-06,
+ "loss": 0.3987,
+ "step": 192700
+ },
+ {
+ "epoch": 0.9944742829724333,
+ "grad_norm": 29266.78515625,
+ "learning_rate": 5.4500054821326865e-06,
+ "loss": 0.4028,
+ "step": 192750
+ },
+ {
+ "epoch": 0.9947322529550462,
+ "grad_norm": 23506.931640625,
+ "learning_rate": 5.433432558216778e-06,
+ "loss": 0.3948,
+ "step": 192800
+ },
+ {
+ "epoch": 0.9949902229376589,
+ "grad_norm": 22564.177734375,
+ "learning_rate": 5.416883423071606e-06,
+ "loss": 0.4015,
+ "step": 192850
+ },
+ {
+ "epoch": 0.9952481929202718,
+ "grad_norm": 24564.380859375,
+ "learning_rate": 5.400358085530738e-06,
+ "loss": 0.4046,
+ "step": 192900
+ },
+ {
+ "epoch": 0.9955061629028846,
+ "grad_norm": 24793.91796875,
+ "learning_rate": 5.383856554415117e-06,
+ "loss": 0.4003,
+ "step": 192950
+ },
+ {
+ "epoch": 0.9957641328854975,
+ "grad_norm": 23798.228515625,
+ "learning_rate": 5.367378838532927e-06,
+ "loss": 0.3982,
+ "step": 193000
+ },
+ {
+ "epoch": 0.9960221028681102,
+ "grad_norm": 23164.642578125,
+ "learning_rate": 5.350924946679653e-06,
+ "loss": 0.3977,
+ "step": 193050
+ },
+ {
+ "epoch": 0.9962800728507231,
+ "grad_norm": 25646.29296875,
+ "learning_rate": 5.334494887638058e-06,
+ "loss": 0.3992,
+ "step": 193100
+ },
+ {
+ "epoch": 0.9965380428333359,
+ "grad_norm": 24146.2421875,
+ "learning_rate": 5.318088670178189e-06,
+ "loss": 0.4037,
+ "step": 193150
+ },
+ {
+ "epoch": 0.9967960128159488,
+ "grad_norm": 22594.72265625,
+ "learning_rate": 5.301706303057386e-06,
+ "loss": 0.4004,
+ "step": 193200
+ },
+ {
+ "epoch": 0.9970539827985616,
+ "grad_norm": 23395.515625,
+ "learning_rate": 5.285347795020224e-06,
+ "loss": 0.3958,
+ "step": 193250
+ },
+ {
+ "epoch": 0.9973119527811743,
+ "grad_norm": 23383.431640625,
+ "learning_rate": 5.269013154798558e-06,
+ "loss": 0.3998,
+ "step": 193300
+ },
+ {
+ "epoch": 0.9975699227637872,
+ "grad_norm": 20586.341796875,
+ "learning_rate": 5.252702391111508e-06,
+ "loss": 0.3979,
+ "step": 193350
+ },
+ {
+ "epoch": 0.9978278927464,
+ "grad_norm": 26526.83203125,
+ "learning_rate": 5.236415512665438e-06,
+ "loss": 0.4036,
+ "step": 193400
+ },
+ {
+ "epoch": 0.9980858627290129,
+ "grad_norm": 25045.224609375,
+ "learning_rate": 5.220152528153965e-06,
+ "loss": 0.4028,
+ "step": 193450
+ },
+ {
+ "epoch": 0.9983438327116256,
+ "grad_norm": 23480.755859375,
+ "learning_rate": 5.20391344625798e-06,
+ "loss": 0.4053,
+ "step": 193500
+ },
+ {
+ "epoch": 0.9986018026942385,
+ "grad_norm": 25235.927734375,
+ "learning_rate": 5.187698275645553e-06,
+ "loss": 0.3964,
+ "step": 193550
+ },
+ {
+ "epoch": 0.9988597726768513,
+ "grad_norm": 24883.29296875,
+ "learning_rate": 5.1715070249720555e-06,
+ "loss": 0.3978,
+ "step": 193600
+ },
+ {
+ "epoch": 0.9991177426594642,
+ "grad_norm": 25161.71484375,
+ "learning_rate": 5.155339702880052e-06,
+ "loss": 0.3998,
+ "step": 193650
+ },
+ {
+ "epoch": 0.9993757126420769,
+ "grad_norm": 21524.724609375,
+ "learning_rate": 5.13919631799934e-06,
+ "loss": 0.3955,
+ "step": 193700
+ },
+ {
+ "epoch": 0.9996336826246898,
+ "grad_norm": 23394.1015625,
+ "learning_rate": 5.123076878946981e-06,
+ "loss": 0.3962,
+ "step": 193750
+ },
+ {
+ "epoch": 0.9998916526073026,
+ "grad_norm": 24562.419921875,
+ "learning_rate": 5.106981394327165e-06,
+ "loss": 0.4,
+ "step": 193800
+ },
+ {
+ "epoch": 1.0001496225899154,
+ "grad_norm": 23818.201171875,
+ "learning_rate": 5.090909872731392e-06,
+ "loss": 0.4065,
+ "step": 193850
+ },
+ {
+ "epoch": 1.0004075925725282,
+ "grad_norm": 25973.83984375,
+ "learning_rate": 5.074862322738316e-06,
+ "loss": 0.4015,
+ "step": 193900
+ },
+ {
+ "epoch": 1.000665562555141,
+ "grad_norm": 26476.041015625,
+ "learning_rate": 5.0588387529138085e-06,
+ "loss": 0.401,
+ "step": 193950
+ },
+ {
+ "epoch": 1.000923532537754,
+ "grad_norm": 22776.267578125,
+ "learning_rate": 5.042839171810937e-06,
+ "loss": 0.4021,
+ "step": 194000
+ },
+ {
+ "epoch": 1.0011815025203668,
+ "grad_norm": 22484.884765625,
+ "learning_rate": 5.026863587969966e-06,
+ "loss": 0.4013,
+ "step": 194050
+ },
+ {
+ "epoch": 1.0014394725029796,
+ "grad_norm": 21445.009765625,
+ "learning_rate": 5.010912009918361e-06,
+ "loss": 0.4001,
+ "step": 194100
+ },
+ {
+ "epoch": 1.0016974424855924,
+ "grad_norm": 23748.365234375,
+ "learning_rate": 4.994984446170764e-06,
+ "loss": 0.3985,
+ "step": 194150
+ },
+ {
+ "epoch": 1.0019554124682053,
+ "grad_norm": 25007.73828125,
+ "learning_rate": 4.9790809052289996e-06,
+ "loss": 0.403,
+ "step": 194200
+ },
+ {
+ "epoch": 1.002213382450818,
+ "grad_norm": 26824.900390625,
+ "learning_rate": 4.963201395582062e-06,
+ "loss": 0.3966,
+ "step": 194250
+ },
+ {
+ "epoch": 1.0024713524334308,
+ "grad_norm": 21838.662109375,
+ "learning_rate": 4.947345925706148e-06,
+ "loss": 0.3955,
+ "step": 194300
+ },
+ {
+ "epoch": 1.0027293224160436,
+ "grad_norm": 20830.59375,
+ "learning_rate": 4.931514504064566e-06,
+ "loss": 0.3976,
+ "step": 194350
+ },
+ {
+ "epoch": 1.0029872923986565,
+ "grad_norm": 24187.484375,
+ "learning_rate": 4.915707139107856e-06,
+ "loss": 0.4009,
+ "step": 194400
+ },
+ {
+ "epoch": 1.0032452623812693,
+ "grad_norm": 23026.99609375,
+ "learning_rate": 4.899923839273662e-06,
+ "loss": 0.4017,
+ "step": 194450
+ },
+ {
+ "epoch": 1.0035032323638822,
+ "grad_norm": 25855.919921875,
+ "learning_rate": 4.884164612986808e-06,
+ "loss": 0.3966,
+ "step": 194500
+ },
+ {
+ "epoch": 1.003761202346495,
+ "grad_norm": 23424.58984375,
+ "learning_rate": 4.86842946865928e-06,
+ "loss": 0.4007,
+ "step": 194550
+ },
+ {
+ "epoch": 1.0040191723291079,
+ "grad_norm": 20644.318359375,
+ "learning_rate": 4.852718414690166e-06,
+ "loss": 0.405,
+ "step": 194600
+ },
+ {
+ "epoch": 1.0042771423117207,
+ "grad_norm": 24923.30078125,
+ "learning_rate": 4.8370314594657405e-06,
+ "loss": 0.3961,
+ "step": 194650
+ },
+ {
+ "epoch": 1.0045351122943333,
+ "grad_norm": 23334.19921875,
+ "learning_rate": 4.821368611359395e-06,
+ "loss": 0.3981,
+ "step": 194700
+ },
+ {
+ "epoch": 1.0047930822769462,
+ "grad_norm": 24258.54296875,
+ "learning_rate": 4.8057298787316516e-06,
+ "loss": 0.3998,
+ "step": 194750
+ },
+ {
+ "epoch": 1.005051052259559,
+ "grad_norm": 23366.234375,
+ "learning_rate": 4.790115269930162e-06,
+ "loss": 0.3998,
+ "step": 194800
+ },
+ {
+ "epoch": 1.005309022242172,
+ "grad_norm": 22389.498046875,
+ "learning_rate": 4.774524793289692e-06,
+ "loss": 0.4025,
+ "step": 194850
+ },
+ {
+ "epoch": 1.0055669922247847,
+ "grad_norm": 25497.361328125,
+ "learning_rate": 4.758958457132157e-06,
+ "loss": 0.3979,
+ "step": 194900
+ },
+ {
+ "epoch": 1.0058249622073976,
+ "grad_norm": 24179.626953125,
+ "learning_rate": 4.7434162697665595e-06,
+ "loss": 0.3984,
+ "step": 194950
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "grad_norm": 24002.955078125,
+ "learning_rate": 4.727898239489015e-06,
+ "loss": 0.398,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "eval_loss": 0.3868441879749298,
+ "eval_runtime": 3205.6792,
+ "eval_samples_per_second": 967.383,
+ "eval_steps_per_second": 1.889,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0063409021726233,
+ "grad_norm": 26567.27734375,
+ "learning_rate": 4.712404374582741e-06,
+ "loss": 0.399,
+ "step": 195050
+ },
+ {
+ "epoch": 1.006598872155236,
+ "grad_norm": 25244.615234375,
+ "learning_rate": 4.696934683318077e-06,
+ "loss": 0.3998,
+ "step": 195100
+ },
+ {
+ "epoch": 1.0068568421378488,
+ "grad_norm": 23278.265625,
+ "learning_rate": 4.6814891739524195e-06,
+ "loss": 0.4002,
+ "step": 195150
+ },
+ {
+ "epoch": 1.0071148121204616,
+ "grad_norm": 23141.138671875,
+ "learning_rate": 4.666067854730322e-06,
+ "loss": 0.3965,
+ "step": 195200
+ },
+ {
+ "epoch": 1.0073727821030745,
+ "grad_norm": 23506.640625,
+ "learning_rate": 4.650670733883344e-06,
+ "loss": 0.3962,
+ "step": 195250
+ },
+ {
+ "epoch": 1.0076307520856873,
+ "grad_norm": 26591.212890625,
+ "learning_rate": 4.635297819630202e-06,
+ "loss": 0.3992,
+ "step": 195300
+ },
+ {
+ "epoch": 1.0078887220683002,
+ "grad_norm": 22111.640625,
+ "learning_rate": 4.619949120176642e-06,
+ "loss": 0.401,
+ "step": 195350
+ },
+ {
+ "epoch": 1.008146692050913,
+ "grad_norm": 25048.17578125,
+ "learning_rate": 4.604624643715505e-06,
+ "loss": 0.4016,
+ "step": 195400
+ },
+ {
+ "epoch": 1.0084046620335259,
+ "grad_norm": 23263.23828125,
+ "learning_rate": 4.589324398426714e-06,
+ "loss": 0.3942,
+ "step": 195450
+ },
+ {
+ "epoch": 1.0086626320161387,
+ "grad_norm": 23640.9296875,
+ "learning_rate": 4.57404839247722e-06,
+ "loss": 0.4039,
+ "step": 195500
+ },
+ {
+ "epoch": 1.0089206019987513,
+ "grad_norm": 25680.390625,
+ "learning_rate": 4.558796634021079e-06,
+ "loss": 0.3986,
+ "step": 195550
+ },
+ {
+ "epoch": 1.0091785719813642,
+ "grad_norm": 23321.78125,
+ "learning_rate": 4.543569131199382e-06,
+ "loss": 0.4039,
+ "step": 195600
+ },
+ {
+ "epoch": 1.009436541963977,
+ "grad_norm": 24123.205078125,
+ "learning_rate": 4.528365892140263e-06,
+ "loss": 0.397,
+ "step": 195650
+ },
+ {
+ "epoch": 1.0096945119465899,
+ "grad_norm": 23332.673828125,
+ "learning_rate": 4.513186924958928e-06,
+ "loss": 0.3941,
+ "step": 195700
+ },
+ {
+ "epoch": 1.0099524819292027,
+ "grad_norm": 25583.609375,
+ "learning_rate": 4.498032237757605e-06,
+ "loss": 0.4046,
+ "step": 195750
+ },
+ {
+ "epoch": 1.0102104519118156,
+ "grad_norm": 25230.3515625,
+ "learning_rate": 4.482901838625586e-06,
+ "loss": 0.4012,
+ "step": 195800
+ },
+ {
+ "epoch": 1.0104684218944284,
+ "grad_norm": 24376.5859375,
+ "learning_rate": 4.46779573563918e-06,
+ "loss": 0.3911,
+ "step": 195850
+ },
+ {
+ "epoch": 1.0107263918770413,
+ "grad_norm": 23978.17578125,
+ "learning_rate": 4.452713936861724e-06,
+ "loss": 0.4031,
+ "step": 195900
+ },
+ {
+ "epoch": 1.010984361859654,
+ "grad_norm": 23535.03515625,
+ "learning_rate": 4.437656450343602e-06,
+ "loss": 0.3933,
+ "step": 195950
+ },
+ {
+ "epoch": 1.0112423318422668,
+ "grad_norm": 24465.794921875,
+ "learning_rate": 4.422623284122207e-06,
+ "loss": 0.4027,
+ "step": 196000
+ },
+ {
+ "epoch": 1.0115003018248796,
+ "grad_norm": 23942.03125,
+ "learning_rate": 4.407614446221936e-06,
+ "loss": 0.4024,
+ "step": 196050
+ },
+ {
+ "epoch": 1.0117582718074924,
+ "grad_norm": 23610.720703125,
+ "learning_rate": 4.392629944654248e-06,
+ "loss": 0.3982,
+ "step": 196100
+ },
+ {
+ "epoch": 1.0120162417901053,
+ "grad_norm": 25937.53125,
+ "learning_rate": 4.3776697874175375e-06,
+ "loss": 0.3991,
+ "step": 196150
+ },
+ {
+ "epoch": 1.0122742117727181,
+ "grad_norm": 24008.5234375,
+ "learning_rate": 4.362733982497286e-06,
+ "loss": 0.3968,
+ "step": 196200
+ },
+ {
+ "epoch": 1.012532181755331,
+ "grad_norm": 23377.744140625,
+ "learning_rate": 4.347822537865914e-06,
+ "loss": 0.3958,
+ "step": 196250
+ },
+ {
+ "epoch": 1.0127901517379438,
+ "grad_norm": 23768.7421875,
+ "learning_rate": 4.332935461482862e-06,
+ "loss": 0.4004,
+ "step": 196300
+ },
+ {
+ "epoch": 1.0130481217205567,
+ "grad_norm": 25974.603515625,
+ "learning_rate": 4.3180727612945896e-06,
+ "loss": 0.4038,
+ "step": 196350
+ },
+ {
+ "epoch": 1.0133060917031693,
+ "grad_norm": 22376.34765625,
+ "learning_rate": 4.303234445234477e-06,
+ "loss": 0.3991,
+ "step": 196400
+ },
+ {
+ "epoch": 1.0135640616857822,
+ "grad_norm": 22145.03515625,
+ "learning_rate": 4.288420521222963e-06,
+ "loss": 0.3971,
+ "step": 196450
+ },
+ {
+ "epoch": 1.013822031668395,
+ "grad_norm": 21512.77734375,
+ "learning_rate": 4.273630997167422e-06,
+ "loss": 0.399,
+ "step": 196500
+ },
+ {
+ "epoch": 1.0140800016510079,
+ "grad_norm": 22957.626953125,
+ "learning_rate": 4.258865880962215e-06,
+ "loss": 0.3995,
+ "step": 196550
+ },
+ {
+ "epoch": 1.0143379716336207,
+ "grad_norm": 21951.89453125,
+ "learning_rate": 4.244125180488673e-06,
+ "loss": 0.3961,
+ "step": 196600
+ },
+ {
+ "epoch": 1.0145959416162336,
+ "grad_norm": 23440.005859375,
+ "learning_rate": 4.229408903615095e-06,
+ "loss": 0.4057,
+ "step": 196650
+ },
+ {
+ "epoch": 1.0148539115988464,
+ "grad_norm": 23987.21484375,
+ "learning_rate": 4.214717058196754e-06,
+ "loss": 0.3999,
+ "step": 196700
+ },
+ {
+ "epoch": 1.0151118815814593,
+ "grad_norm": 24526.482421875,
+ "learning_rate": 4.200049652075866e-06,
+ "loss": 0.3964,
+ "step": 196750
+ },
+ {
+ "epoch": 1.0153698515640721,
+ "grad_norm": 23351.193359375,
+ "learning_rate": 4.185406693081612e-06,
+ "loss": 0.3978,
+ "step": 196800
+ },
+ {
+ "epoch": 1.0156278215466847,
+ "grad_norm": 25014.873046875,
+ "learning_rate": 4.170788189030106e-06,
+ "loss": 0.3963,
+ "step": 196850
+ },
+ {
+ "epoch": 1.0158857915292976,
+ "grad_norm": 21085.181640625,
+ "learning_rate": 4.156194147724451e-06,
+ "loss": 0.4015,
+ "step": 196900
+ },
+ {
+ "epoch": 1.0161437615119104,
+ "grad_norm": 20203.427734375,
+ "learning_rate": 4.141624576954634e-06,
+ "loss": 0.4037,
+ "step": 196950
+ },
+ {
+ "epoch": 1.0164017314945233,
+ "grad_norm": 23869.416015625,
+ "learning_rate": 4.1270794844976255e-06,
+ "loss": 0.4038,
+ "step": 197000
+ },
+ {
+ "epoch": 1.0166597014771361,
+ "grad_norm": 24936.158203125,
+ "learning_rate": 4.112558878117318e-06,
+ "loss": 0.4073,
+ "step": 197050
+ },
+ {
+ "epoch": 1.016917671459749,
+ "grad_norm": 23021.921875,
+ "learning_rate": 4.098062765564509e-06,
+ "loss": 0.4056,
+ "step": 197100
+ },
+ {
+ "epoch": 1.0171756414423618,
+ "grad_norm": 21626.19921875,
+ "learning_rate": 4.083591154576971e-06,
+ "loss": 0.3989,
+ "step": 197150
+ },
+ {
+ "epoch": 1.0174336114249747,
+ "grad_norm": 25556.169921875,
+ "learning_rate": 4.069144052879342e-06,
+ "loss": 0.3975,
+ "step": 197200
+ },
+ {
+ "epoch": 1.0176915814075873,
+ "grad_norm": 23286.365234375,
+ "learning_rate": 4.054721468183226e-06,
+ "loss": 0.3974,
+ "step": 197250
+ },
+ {
+ "epoch": 1.0179495513902002,
+ "grad_norm": 24497.57421875,
+ "learning_rate": 4.040323408187113e-06,
+ "loss": 0.4028,
+ "step": 197300
+ },
+ {
+ "epoch": 1.018207521372813,
+ "grad_norm": 26279.40625,
+ "learning_rate": 4.025949880576407e-06,
+ "loss": 0.4034,
+ "step": 197350
+ },
+ {
+ "epoch": 1.0184654913554259,
+ "grad_norm": 22679.267578125,
+ "learning_rate": 4.011600893023421e-06,
+ "loss": 0.3991,
+ "step": 197400
+ },
+ {
+ "epoch": 1.0187234613380387,
+ "grad_norm": 25421.83984375,
+ "learning_rate": 3.997276453187365e-06,
+ "loss": 0.4023,
+ "step": 197450
+ },
+ {
+ "epoch": 1.0189814313206516,
+ "grad_norm": 25313.75,
+ "learning_rate": 3.982976568714336e-06,
+ "loss": 0.4018,
+ "step": 197500
+ },
+ {
+ "epoch": 1.0192394013032644,
+ "grad_norm": 24318.505859375,
+ "learning_rate": 3.96870124723736e-06,
+ "loss": 0.4027,
+ "step": 197550
+ },
+ {
+ "epoch": 1.0194973712858773,
+ "grad_norm": 22409.70703125,
+ "learning_rate": 3.9544504963763105e-06,
+ "loss": 0.3982,
+ "step": 197600
+ },
+ {
+ "epoch": 1.01975534126849,
+ "grad_norm": 25028.7265625,
+ "learning_rate": 3.9402243237379675e-06,
+ "loss": 0.4037,
+ "step": 197650
+ },
+ {
+ "epoch": 1.0200133112511027,
+ "grad_norm": 21235.19140625,
+ "learning_rate": 3.926022736915985e-06,
+ "loss": 0.3972,
+ "step": 197700
+ },
+ {
+ "epoch": 1.0202712812337156,
+ "grad_norm": 24214.41015625,
+ "learning_rate": 3.911845743490889e-06,
+ "loss": 0.3984,
+ "step": 197750
+ },
+ {
+ "epoch": 1.0205292512163284,
+ "grad_norm": 24445.375,
+ "learning_rate": 3.897693351030102e-06,
+ "loss": 0.4025,
+ "step": 197800
+ },
+ {
+ "epoch": 1.0207872211989413,
+ "grad_norm": 25233.3515625,
+ "learning_rate": 3.883565567087871e-06,
+ "loss": 0.3993,
+ "step": 197850
+ },
+ {
+ "epoch": 1.0210451911815541,
+ "grad_norm": 23982.43359375,
+ "learning_rate": 3.8694623992053534e-06,
+ "loss": 0.4023,
+ "step": 197900
+ },
+ {
+ "epoch": 1.021303161164167,
+ "grad_norm": 28533.689453125,
+ "learning_rate": 3.855383854910549e-06,
+ "loss": 0.3917,
+ "step": 197950
+ },
+ {
+ "epoch": 1.0215611311467798,
+ "grad_norm": 26334.77734375,
+ "learning_rate": 3.841329941718286e-06,
+ "loss": 0.3989,
+ "step": 198000
+ },
+ {
+ "epoch": 1.0218191011293927,
+ "grad_norm": 24765.802734375,
+ "learning_rate": 3.827300667130312e-06,
+ "loss": 0.398,
+ "step": 198050
+ },
+ {
+ "epoch": 1.0220770711120055,
+ "grad_norm": 25089.34765625,
+ "learning_rate": 3.8132960386351445e-06,
+ "loss": 0.4049,
+ "step": 198100
+ },
+ {
+ "epoch": 1.0223350410946181,
+ "grad_norm": 23840.72265625,
+ "learning_rate": 3.7993160637082027e-06,
+ "loss": 0.3998,
+ "step": 198150
+ },
+ {
+ "epoch": 1.022593011077231,
+ "grad_norm": 21590.1328125,
+ "learning_rate": 3.7853607498117282e-06,
+ "loss": 0.404,
+ "step": 198200
+ },
+ {
+ "epoch": 1.0228509810598438,
+ "grad_norm": 24620.478515625,
+ "learning_rate": 3.7714301043947855e-06,
+ "loss": 0.3958,
+ "step": 198250
+ },
+ {
+ "epoch": 1.0231089510424567,
+ "grad_norm": 22476.82421875,
+ "learning_rate": 3.757524134893292e-06,
+ "loss": 0.3993,
+ "step": 198300
+ },
+ {
+ "epoch": 1.0233669210250695,
+ "grad_norm": 22550.45703125,
+ "learning_rate": 3.7436428487299836e-06,
+ "loss": 0.3983,
+ "step": 198350
+ },
+ {
+ "epoch": 1.0236248910076824,
+ "grad_norm": 23764.958984375,
+ "learning_rate": 3.7297862533144045e-06,
+ "loss": 0.4005,
+ "step": 198400
+ },
+ {
+ "epoch": 1.0238828609902952,
+ "grad_norm": 23600.103515625,
+ "learning_rate": 3.7159543560429667e-06,
+ "loss": 0.3976,
+ "step": 198450
+ },
+ {
+ "epoch": 1.024140830972908,
+ "grad_norm": 24258.537109375,
+ "learning_rate": 3.7021471642988583e-06,
+ "loss": 0.4015,
+ "step": 198500
+ },
+ {
+ "epoch": 1.0243988009555207,
+ "grad_norm": 22559.609375,
+ "learning_rate": 3.6883646854520837e-06,
+ "loss": 0.4028,
+ "step": 198550
+ },
+ {
+ "epoch": 1.0246567709381336,
+ "grad_norm": 20827.234375,
+ "learning_rate": 3.67460692685947e-06,
+ "loss": 0.3954,
+ "step": 198600
+ },
+ {
+ "epoch": 1.0249147409207464,
+ "grad_norm": 24864.171875,
+ "learning_rate": 3.6608738958646303e-06,
+ "loss": 0.3919,
+ "step": 198650
+ },
+ {
+ "epoch": 1.0251727109033593,
+ "grad_norm": 25603.6796875,
+ "learning_rate": 3.647165599798019e-06,
+ "loss": 0.3984,
+ "step": 198700
+ },
+ {
+ "epoch": 1.0254306808859721,
+ "grad_norm": 21448.0234375,
+ "learning_rate": 3.6334820459768217e-06,
+ "loss": 0.4031,
+ "step": 198750
+ },
+ {
+ "epoch": 1.025688650868585,
+ "grad_norm": 24923.51953125,
+ "learning_rate": 3.6198232417050782e-06,
+ "loss": 0.4023,
+ "step": 198800
+ },
+ {
+ "epoch": 1.0259466208511978,
+ "grad_norm": 21672.09765625,
+ "learning_rate": 3.6061891942735957e-06,
+ "loss": 0.4027,
+ "step": 198850
+ },
+ {
+ "epoch": 1.0262045908338107,
+ "grad_norm": 24733.31640625,
+ "learning_rate": 3.5925799109599423e-06,
+ "loss": 0.401,
+ "step": 198900
+ },
+ {
+ "epoch": 1.0264625608164235,
+ "grad_norm": 25941.05859375,
+ "learning_rate": 3.5789953990285284e-06,
+ "loss": 0.3944,
+ "step": 198950
+ },
+ {
+ "epoch": 1.0267205307990361,
+ "grad_norm": 25462.96875,
+ "learning_rate": 3.56543566573046e-06,
+ "loss": 0.4021,
+ "step": 199000
+ },
+ {
+ "epoch": 1.026978500781649,
+ "grad_norm": 24243.462890625,
+ "learning_rate": 3.5519007183036856e-06,
+ "loss": 0.4009,
+ "step": 199050
+ },
+ {
+ "epoch": 1.0272364707642618,
+ "grad_norm": 22507.208984375,
+ "learning_rate": 3.5383905639728987e-06,
+ "loss": 0.3968,
+ "step": 199100
+ },
+ {
+ "epoch": 1.0274944407468747,
+ "grad_norm": 22496.060546875,
+ "learning_rate": 3.524905209949553e-06,
+ "loss": 0.3988,
+ "step": 199150
+ },
+ {
+ "epoch": 1.0277524107294875,
+ "grad_norm": 22755.974609375,
+ "learning_rate": 3.511444663431862e-06,
+ "loss": 0.3944,
+ "step": 199200
+ },
+ {
+ "epoch": 1.0280103807121004,
+ "grad_norm": 24945.93359375,
+ "learning_rate": 3.498008931604818e-06,
+ "loss": 0.4015,
+ "step": 199250
+ },
+ {
+ "epoch": 1.0282683506947132,
+ "grad_norm": 23216.15625,
+ "learning_rate": 3.484598021640134e-06,
+ "loss": 0.3982,
+ "step": 199300
+ },
+ {
+ "epoch": 1.028526320677326,
+ "grad_norm": 24690.8203125,
+ "learning_rate": 3.4712119406963174e-06,
+ "loss": 0.4,
+ "step": 199350
+ },
+ {
+ "epoch": 1.0287842906599387,
+ "grad_norm": 23324.27734375,
+ "learning_rate": 3.4578506959185907e-06,
+ "loss": 0.4005,
+ "step": 199400
+ },
+ {
+ "epoch": 1.0290422606425516,
+ "grad_norm": 22831.544921875,
+ "learning_rate": 3.444514294438922e-06,
+ "loss": 0.3987,
+ "step": 199450
+ },
+ {
+ "epoch": 1.0293002306251644,
+ "grad_norm": 22126.681640625,
+ "learning_rate": 3.4312027433760383e-06,
+ "loss": 0.4044,
+ "step": 199500
+ },
+ {
+ "epoch": 1.0295582006077773,
+ "grad_norm": 22105.94140625,
+ "learning_rate": 3.417916049835368e-06,
+ "loss": 0.4023,
+ "step": 199550
+ },
+ {
+ "epoch": 1.02981617059039,
+ "grad_norm": 24164.646484375,
+ "learning_rate": 3.4046542209091037e-06,
+ "loss": 0.3968,
+ "step": 199600
+ },
+ {
+ "epoch": 1.030074140573003,
+ "grad_norm": 23752.33203125,
+ "learning_rate": 3.3914172636761554e-06,
+ "loss": 0.3974,
+ "step": 199650
+ },
+ {
+ "epoch": 1.0303321105556158,
+ "grad_norm": 21793.787109375,
+ "learning_rate": 3.3782051852021433e-06,
+ "loss": 0.3981,
+ "step": 199700
+ },
+ {
+ "epoch": 1.0305900805382286,
+ "grad_norm": 26727.91796875,
+ "learning_rate": 3.365017992539432e-06,
+ "loss": 0.4025,
+ "step": 199750
+ },
+ {
+ "epoch": 1.0308480505208415,
+ "grad_norm": 21089.958984375,
+ "learning_rate": 3.3518556927270683e-06,
+ "loss": 0.4001,
+ "step": 199800
+ },
+ {
+ "epoch": 1.0311060205034541,
+ "grad_norm": 23690.0390625,
+ "learning_rate": 3.33871829279086e-06,
+ "loss": 0.3956,
+ "step": 199850
+ },
+ {
+ "epoch": 1.031363990486067,
+ "grad_norm": 24266.84375,
+ "learning_rate": 3.325605799743281e-06,
+ "loss": 0.3966,
+ "step": 199900
+ },
+ {
+ "epoch": 1.0316219604686798,
+ "grad_norm": 22199.455078125,
+ "learning_rate": 3.312518220583527e-06,
+ "loss": 0.4058,
+ "step": 199950
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "grad_norm": 21272.033203125,
+ "learning_rate": 3.299455562297504e-06,
+ "loss": 0.3969,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "eval_loss": 0.38684460520744324,
+ "eval_runtime": 3230.0057,
+ "eval_samples_per_second": 960.097,
+ "eval_steps_per_second": 1.875,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0321379004339055,
+ "grad_norm": 23089.7578125,
+ "learning_rate": 3.286417831857791e-06,
+ "loss": 0.4011,
+ "step": 200050
+ },
+ {
+ "epoch": 1.0323958704165184,
+ "grad_norm": 27875.5859375,
+ "learning_rate": 3.2734050362236814e-06,
+ "loss": 0.4014,
+ "step": 200100
+ },
+ {
+ "epoch": 1.0326538403991312,
+ "grad_norm": 22023.40234375,
+ "learning_rate": 3.260417182341169e-06,
+ "loss": 0.398,
+ "step": 200150
+ },
+ {
+ "epoch": 1.032911810381744,
+ "grad_norm": 23899.208984375,
+ "learning_rate": 3.247454277142892e-06,
+ "loss": 0.3976,
+ "step": 200200
+ },
+ {
+ "epoch": 1.0331697803643567,
+ "grad_norm": 22874.44921875,
+ "learning_rate": 3.2345163275482147e-06,
+ "loss": 0.4014,
+ "step": 200250
+ },
+ {
+ "epoch": 1.0334277503469695,
+ "grad_norm": 21650.296875,
+ "learning_rate": 3.221603340463164e-06,
+ "loss": 0.4012,
+ "step": 200300
+ },
+ {
+ "epoch": 1.0336857203295824,
+ "grad_norm": 24189.89453125,
+ "learning_rate": 3.2087153227804314e-06,
+ "loss": 0.401,
+ "step": 200350
+ },
+ {
+ "epoch": 1.0339436903121952,
+ "grad_norm": 21525.12109375,
+ "learning_rate": 3.1958522813794134e-06,
+ "loss": 0.4016,
+ "step": 200400
+ },
+ {
+ "epoch": 1.034201660294808,
+ "grad_norm": 23732.640625,
+ "learning_rate": 3.1830142231261294e-06,
+ "loss": 0.4021,
+ "step": 200450
+ },
+ {
+ "epoch": 1.034459630277421,
+ "grad_norm": 24911.607421875,
+ "learning_rate": 3.170201154873298e-06,
+ "loss": 0.3943,
+ "step": 200500
+ },
+ {
+ "epoch": 1.0347176002600338,
+ "grad_norm": 25295.861328125,
+ "learning_rate": 3.1574130834602813e-06,
+ "loss": 0.401,
+ "step": 200550
+ },
+ {
+ "epoch": 1.0349755702426466,
+ "grad_norm": 23536.498046875,
+ "learning_rate": 3.1446500157131075e-06,
+ "loss": 0.3964,
+ "step": 200600
+ },
+ {
+ "epoch": 1.0352335402252595,
+ "grad_norm": 26484.287109375,
+ "learning_rate": 3.131911958444461e-06,
+ "loss": 0.4068,
+ "step": 200650
+ },
+ {
+ "epoch": 1.0354915102078721,
+ "grad_norm": 24330.001953125,
+ "learning_rate": 3.1191989184536474e-06,
+ "loss": 0.3911,
+ "step": 200700
+ },
+ {
+ "epoch": 1.035749480190485,
+ "grad_norm": 21095.994140625,
+ "learning_rate": 3.1065109025266713e-06,
+ "loss": 0.4,
+ "step": 200750
+ },
+ {
+ "epoch": 1.0360074501730978,
+ "grad_norm": 21829.64453125,
+ "learning_rate": 3.093847917436132e-06,
+ "loss": 0.4016,
+ "step": 200800
+ },
+ {
+ "epoch": 1.0362654201557107,
+ "grad_norm": 25772.79296875,
+ "learning_rate": 3.0812099699412953e-06,
+ "loss": 0.4032,
+ "step": 200850
+ },
+ {
+ "epoch": 1.0365233901383235,
+ "grad_norm": 25614.240234375,
+ "learning_rate": 3.0685970667880425e-06,
+ "loss": 0.3976,
+ "step": 200900
+ },
+ {
+ "epoch": 1.0367813601209364,
+ "grad_norm": 26170.455078125,
+ "learning_rate": 3.056009214708905e-06,
+ "loss": 0.4001,
+ "step": 200950
+ },
+ {
+ "epoch": 1.0370393301035492,
+ "grad_norm": 24801.76171875,
+ "learning_rate": 3.0434464204230186e-06,
+ "loss": 0.3924,
+ "step": 201000
+ },
+ {
+ "epoch": 1.037297300086162,
+ "grad_norm": 28940.640625,
+ "learning_rate": 3.0309086906361917e-06,
+ "loss": 0.3998,
+ "step": 201050
+ },
+ {
+ "epoch": 1.037555270068775,
+ "grad_norm": 23856.90625,
+ "learning_rate": 3.018396032040788e-06,
+ "loss": 0.397,
+ "step": 201100
+ },
+ {
+ "epoch": 1.0378132400513875,
+ "grad_norm": 23309.861328125,
+ "learning_rate": 3.005908451315842e-06,
+ "loss": 0.4026,
+ "step": 201150
+ },
+ {
+ "epoch": 1.0380712100340004,
+ "grad_norm": 23592.7265625,
+ "learning_rate": 2.993445955126978e-06,
+ "loss": 0.3971,
+ "step": 201200
+ },
+ {
+ "epoch": 1.0383291800166132,
+ "grad_norm": 23301.861328125,
+ "learning_rate": 2.9810085501264296e-06,
+ "loss": 0.403,
+ "step": 201250
+ },
+ {
+ "epoch": 1.038587149999226,
+ "grad_norm": 23200.0859375,
+ "learning_rate": 2.968596242953059e-06,
+ "loss": 0.4001,
+ "step": 201300
+ },
+ {
+ "epoch": 1.038845119981839,
+ "grad_norm": 26894.70703125,
+ "learning_rate": 2.956209040232294e-06,
+ "loss": 0.3988,
+ "step": 201350
+ },
+ {
+ "epoch": 1.0391030899644518,
+ "grad_norm": 22423.931640625,
+ "learning_rate": 2.9438469485761956e-06,
+ "loss": 0.3981,
+ "step": 201400
+ },
+ {
+ "epoch": 1.0393610599470646,
+ "grad_norm": 24167.068359375,
+ "learning_rate": 2.9315099745834073e-06,
+ "loss": 0.4024,
+ "step": 201450
+ },
+ {
+ "epoch": 1.0396190299296775,
+ "grad_norm": 25832.712890625,
+ "learning_rate": 2.9191981248391677e-06,
+ "loss": 0.3937,
+ "step": 201500
+ },
+ {
+ "epoch": 1.03987699991229,
+ "grad_norm": 26923.005859375,
+ "learning_rate": 2.9069114059153024e-06,
+ "loss": 0.3922,
+ "step": 201550
+ },
+ {
+ "epoch": 1.040134969894903,
+ "grad_norm": 23295.380859375,
+ "learning_rate": 2.8946498243702158e-06,
+ "loss": 0.4011,
+ "step": 201600
+ },
+ {
+ "epoch": 1.0403929398775158,
+ "grad_norm": 23378.5234375,
+ "learning_rate": 2.882413386748922e-06,
+ "loss": 0.4033,
+ "step": 201650
+ },
+ {
+ "epoch": 1.0406509098601286,
+ "grad_norm": 24349.9140625,
+ "learning_rate": 2.8702020995829803e-06,
+ "loss": 0.3964,
+ "step": 201700
+ },
+ {
+ "epoch": 1.0409088798427415,
+ "grad_norm": 24178.61328125,
+ "learning_rate": 2.8580159693905485e-06,
+ "loss": 0.3978,
+ "step": 201750
+ },
+ {
+ "epoch": 1.0411668498253543,
+ "grad_norm": 24998.189453125,
+ "learning_rate": 2.8458550026763344e-06,
+ "loss": 0.3943,
+ "step": 201800
+ },
+ {
+ "epoch": 1.0414248198079672,
+ "grad_norm": 28928.828125,
+ "learning_rate": 2.8337192059316344e-06,
+ "loss": 0.3998,
+ "step": 201850
+ },
+ {
+ "epoch": 1.04168278979058,
+ "grad_norm": 24329.37890625,
+ "learning_rate": 2.8216085856342946e-06,
+ "loss": 0.3976,
+ "step": 201900
+ },
+ {
+ "epoch": 1.041940759773193,
+ "grad_norm": 24121.482421875,
+ "learning_rate": 2.809523148248744e-06,
+ "loss": 0.3952,
+ "step": 201950
+ },
+ {
+ "epoch": 1.0421987297558055,
+ "grad_norm": 23812.671875,
+ "learning_rate": 2.7974629002259443e-06,
+ "loss": 0.4052,
+ "step": 202000
+ },
+ {
+ "epoch": 1.0424566997384184,
+ "grad_norm": 25162.40234375,
+ "learning_rate": 2.785427848003419e-06,
+ "loss": 0.3948,
+ "step": 202050
+ },
+ {
+ "epoch": 1.0427146697210312,
+ "grad_norm": 23631.462890625,
+ "learning_rate": 2.773417998005262e-06,
+ "loss": 0.3982,
+ "step": 202100
+ },
+ {
+ "epoch": 1.042972639703644,
+ "grad_norm": 24178.177734375,
+ "learning_rate": 2.761433356642079e-06,
+ "loss": 0.4012,
+ "step": 202150
+ },
+ {
+ "epoch": 1.043230609686257,
+ "grad_norm": 24726.37890625,
+ "learning_rate": 2.7494739303110527e-06,
+ "loss": 0.3926,
+ "step": 202200
+ },
+ {
+ "epoch": 1.0434885796688698,
+ "grad_norm": 23798.73828125,
+ "learning_rate": 2.7375397253958935e-06,
+ "loss": 0.3998,
+ "step": 202250
+ },
+ {
+ "epoch": 1.0437465496514826,
+ "grad_norm": 25162.677734375,
+ "learning_rate": 2.725630748266844e-06,
+ "loss": 0.4038,
+ "step": 202300
+ },
+ {
+ "epoch": 1.0440045196340955,
+ "grad_norm": 28668.78515625,
+ "learning_rate": 2.7137470052806814e-06,
+ "loss": 0.3989,
+ "step": 202350
+ },
+ {
+ "epoch": 1.0442624896167083,
+ "grad_norm": 22550.810546875,
+ "learning_rate": 2.7018885027807195e-06,
+ "loss": 0.3994,
+ "step": 202400
+ },
+ {
+ "epoch": 1.044520459599321,
+ "grad_norm": 26758.71484375,
+ "learning_rate": 2.6900552470968064e-06,
+ "loss": 0.4063,
+ "step": 202450
+ },
+ {
+ "epoch": 1.0447784295819338,
+ "grad_norm": 24895.77734375,
+ "learning_rate": 2.678247244545301e-06,
+ "loss": 0.3968,
+ "step": 202500
+ },
+ {
+ "epoch": 1.0450363995645466,
+ "grad_norm": 22442.416015625,
+ "learning_rate": 2.6664645014290833e-06,
+ "loss": 0.4009,
+ "step": 202550
+ },
+ {
+ "epoch": 1.0452943695471595,
+ "grad_norm": 24647.232421875,
+ "learning_rate": 2.654707024037556e-06,
+ "loss": 0.3984,
+ "step": 202600
+ },
+ {
+ "epoch": 1.0455523395297723,
+ "grad_norm": 24156.189453125,
+ "learning_rate": 2.6429748186466265e-06,
+ "loss": 0.3983,
+ "step": 202650
+ },
+ {
+ "epoch": 1.0458103095123852,
+ "grad_norm": 24131.658203125,
+ "learning_rate": 2.6312678915187185e-06,
+ "loss": 0.3941,
+ "step": 202700
+ },
+ {
+ "epoch": 1.046068279494998,
+ "grad_norm": 24890.5625,
+ "learning_rate": 2.6195862489027833e-06,
+ "loss": 0.3936,
+ "step": 202750
+ },
+ {
+ "epoch": 1.0463262494776109,
+ "grad_norm": 26486.58203125,
+ "learning_rate": 2.607929897034228e-06,
+ "loss": 0.4073,
+ "step": 202800
+ },
+ {
+ "epoch": 1.0465842194602235,
+ "grad_norm": 24554.09375,
+ "learning_rate": 2.5962988421350033e-06,
+ "loss": 0.3985,
+ "step": 202850
+ },
+ {
+ "epoch": 1.0468421894428364,
+ "grad_norm": 24964.349609375,
+ "learning_rate": 2.584693090413537e-06,
+ "loss": 0.3974,
+ "step": 202900
+ },
+ {
+ "epoch": 1.0471001594254492,
+ "grad_norm": 21256.87890625,
+ "learning_rate": 2.5731126480647516e-06,
+ "loss": 0.3969,
+ "step": 202950
+ },
+ {
+ "epoch": 1.047358129408062,
+ "grad_norm": 23721.197265625,
+ "learning_rate": 2.5615575212700804e-06,
+ "loss": 0.4039,
+ "step": 203000
+ },
+ {
+ "epoch": 1.047616099390675,
+ "grad_norm": 25096.4609375,
+ "learning_rate": 2.550027716197395e-06,
+ "loss": 0.3953,
+ "step": 203050
+ },
+ {
+ "epoch": 1.0478740693732878,
+ "grad_norm": 22199.11328125,
+ "learning_rate": 2.5385232390011114e-06,
+ "loss": 0.3979,
+ "step": 203100
+ },
+ {
+ "epoch": 1.0481320393559006,
+ "grad_norm": 24967.4609375,
+ "learning_rate": 2.527044095822084e-06,
+ "loss": 0.4023,
+ "step": 203150
+ },
+ {
+ "epoch": 1.0483900093385135,
+ "grad_norm": 28301.302734375,
+ "learning_rate": 2.5155902927876564e-06,
+ "loss": 0.4047,
+ "step": 203200
+ },
+ {
+ "epoch": 1.0486479793211263,
+ "grad_norm": 22268.037109375,
+ "learning_rate": 2.504161836011648e-06,
+ "loss": 0.4032,
+ "step": 203250
+ },
+ {
+ "epoch": 1.048905949303739,
+ "grad_norm": 28254.658203125,
+ "learning_rate": 2.4927587315943414e-06,
+ "loss": 0.3915,
+ "step": 203300
+ },
+ {
+ "epoch": 1.0491639192863518,
+ "grad_norm": 24471.462890625,
+ "learning_rate": 2.4813809856225112e-06,
+ "loss": 0.3986,
+ "step": 203350
+ },
+ {
+ "epoch": 1.0494218892689646,
+ "grad_norm": 24208.7578125,
+ "learning_rate": 2.470028604169361e-06,
+ "loss": 0.3969,
+ "step": 203400
+ },
+ {
+ "epoch": 1.0496798592515775,
+ "grad_norm": 23962.025390625,
+ "learning_rate": 2.4587015932945824e-06,
+ "loss": 0.3992,
+ "step": 203450
+ },
+ {
+ "epoch": 1.0499378292341903,
+ "grad_norm": 24777.421875,
+ "learning_rate": 2.4473999590443054e-06,
+ "loss": 0.4042,
+ "step": 203500
+ },
+ {
+ "epoch": 1.0501957992168032,
+ "grad_norm": 26705.40234375,
+ "learning_rate": 2.4361237074511323e-06,
+ "loss": 0.3985,
+ "step": 203550
+ },
+ {
+ "epoch": 1.050453769199416,
+ "grad_norm": 22508.51171875,
+ "learning_rate": 2.424872844534093e-06,
+ "loss": 0.3967,
+ "step": 203600
+ },
+ {
+ "epoch": 1.0507117391820289,
+ "grad_norm": 24678.62109375,
+ "learning_rate": 2.4136473762987057e-06,
+ "loss": 0.4002,
+ "step": 203650
+ },
+ {
+ "epoch": 1.0509697091646415,
+ "grad_norm": 24190.259765625,
+ "learning_rate": 2.402447308736883e-06,
+ "loss": 0.4002,
+ "step": 203700
+ },
+ {
+ "epoch": 1.0512276791472543,
+ "grad_norm": 27986.912109375,
+ "learning_rate": 2.391272647827014e-06,
+ "loss": 0.406,
+ "step": 203750
+ },
+ {
+ "epoch": 1.0514856491298672,
+ "grad_norm": 23664.740234375,
+ "learning_rate": 2.3801233995339236e-06,
+ "loss": 0.3988,
+ "step": 203800
+ },
+ {
+ "epoch": 1.05174361911248,
+ "grad_norm": 32503.17578125,
+ "learning_rate": 2.368999569808844e-06,
+ "loss": 0.3996,
+ "step": 203850
+ },
+ {
+ "epoch": 1.052001589095093,
+ "grad_norm": 24140.591796875,
+ "learning_rate": 2.3579011645894933e-06,
+ "loss": 0.4021,
+ "step": 203900
+ },
+ {
+ "epoch": 1.0522595590777057,
+ "grad_norm": 24920.033203125,
+ "learning_rate": 2.3468281897999487e-06,
+ "loss": 0.4038,
+ "step": 203950
+ },
+ {
+ "epoch": 1.0525175290603186,
+ "grad_norm": 20836.1796875,
+ "learning_rate": 2.335780651350772e-06,
+ "loss": 0.3929,
+ "step": 204000
+ },
+ {
+ "epoch": 1.0527754990429314,
+ "grad_norm": 22305.021484375,
+ "learning_rate": 2.324758555138923e-06,
+ "loss": 0.3963,
+ "step": 204050
+ },
+ {
+ "epoch": 1.0530334690255443,
+ "grad_norm": 22536.13671875,
+ "learning_rate": 2.3137619070477788e-06,
+ "loss": 0.3923,
+ "step": 204100
+ },
+ {
+ "epoch": 1.053291439008157,
+ "grad_norm": 23319.326171875,
+ "learning_rate": 2.3027907129471395e-06,
+ "loss": 0.4034,
+ "step": 204150
+ },
+ {
+ "epoch": 1.0535494089907698,
+ "grad_norm": 25774.677734375,
+ "learning_rate": 2.2918449786932085e-06,
+ "loss": 0.4015,
+ "step": 204200
+ },
+ {
+ "epoch": 1.0538073789733826,
+ "grad_norm": 23130.119140625,
+ "learning_rate": 2.280924710128618e-06,
+ "loss": 0.3971,
+ "step": 204250
+ },
+ {
+ "epoch": 1.0540653489559955,
+ "grad_norm": 23122.1875,
+ "learning_rate": 2.270029913082394e-06,
+ "loss": 0.3969,
+ "step": 204300
+ },
+ {
+ "epoch": 1.0543233189386083,
+ "grad_norm": 21518.763671875,
+ "learning_rate": 2.2591605933699632e-06,
+ "loss": 0.3992,
+ "step": 204350
+ },
+ {
+ "epoch": 1.0545812889212212,
+ "grad_norm": 25077.322265625,
+ "learning_rate": 2.248316756793156e-06,
+ "loss": 0.405,
+ "step": 204400
+ },
+ {
+ "epoch": 1.054839258903834,
+ "grad_norm": 23907.869140625,
+ "learning_rate": 2.237498409140215e-06,
+ "loss": 0.4009,
+ "step": 204450
+ },
+ {
+ "epoch": 1.0550972288864469,
+ "grad_norm": 22796.865234375,
+ "learning_rate": 2.2267055561857484e-06,
+ "loss": 0.4044,
+ "step": 204500
+ },
+ {
+ "epoch": 1.0553551988690595,
+ "grad_norm": 33471.05859375,
+ "learning_rate": 2.2159382036907927e-06,
+ "loss": 0.4021,
+ "step": 204550
+ },
+ {
+ "epoch": 1.0556131688516723,
+ "grad_norm": 23975.6640625,
+ "learning_rate": 2.2051963574027225e-06,
+ "loss": 0.3922,
+ "step": 204600
+ },
+ {
+ "epoch": 1.0558711388342852,
+ "grad_norm": 24563.220703125,
+ "learning_rate": 2.194480023055351e-06,
+ "loss": 0.3952,
+ "step": 204650
+ },
+ {
+ "epoch": 1.056129108816898,
+ "grad_norm": 24479.20703125,
+ "learning_rate": 2.1837892063688525e-06,
+ "loss": 0.4005,
+ "step": 204700
+ },
+ {
+ "epoch": 1.0563870787995109,
+ "grad_norm": 24895.6640625,
+ "learning_rate": 2.173123913049757e-06,
+ "loss": 0.3985,
+ "step": 204750
+ },
+ {
+ "epoch": 1.0566450487821237,
+ "grad_norm": 25606.34765625,
+ "learning_rate": 2.1624841487910052e-06,
+ "loss": 0.4019,
+ "step": 204800
+ },
+ {
+ "epoch": 1.0569030187647366,
+ "grad_norm": 23026.8828125,
+ "learning_rate": 2.151869919271904e-06,
+ "loss": 0.4023,
+ "step": 204850
+ },
+ {
+ "epoch": 1.0571609887473494,
+ "grad_norm": 24365.9609375,
+ "learning_rate": 2.1412812301581097e-06,
+ "loss": 0.3992,
+ "step": 204900
+ },
+ {
+ "epoch": 1.0574189587299623,
+ "grad_norm": 25374.990234375,
+ "learning_rate": 2.130718087101663e-06,
+ "loss": 0.4009,
+ "step": 204950
+ },
+ {
+ "epoch": 1.057676928712575,
+ "grad_norm": 23697.388671875,
+ "learning_rate": 2.1201804957409697e-06,
+ "loss": 0.4042,
+ "step": 205000
+ },
+ {
+ "epoch": 1.057676928712575,
+ "eval_loss": 0.386392205953598,
+ "eval_runtime": 3213.2768,
+ "eval_samples_per_second": 965.096,
+ "eval_steps_per_second": 1.885,
+ "step": 205000
+ },
+ {
+ "epoch": 1.0579348986951878,
+ "grad_norm": 23768.669921875,
+ "learning_rate": 2.109668461700781e-06,
+ "loss": 0.4058,
+ "step": 205050
+ },
+ {
+ "epoch": 1.0581928686778006,
+ "grad_norm": 24203.693359375,
+ "learning_rate": 2.099181990592236e-06,
+ "loss": 0.3971,
+ "step": 205100
+ },
+ {
+ "epoch": 1.0584508386604135,
+ "grad_norm": 26739.72265625,
+ "learning_rate": 2.088721088012796e-06,
+ "loss": 0.4008,
+ "step": 205150
+ },
+ {
+ "epoch": 1.0587088086430263,
+ "grad_norm": 25664.5,
+ "learning_rate": 2.078285759546289e-06,
+ "loss": 0.4001,
+ "step": 205200
+ },
+ {
+ "epoch": 1.0589667786256391,
+ "grad_norm": 22887.986328125,
+ "learning_rate": 2.067876010762898e-06,
+ "loss": 0.3946,
+ "step": 205250
+ },
+ {
+ "epoch": 1.059224748608252,
+ "grad_norm": 24908.890625,
+ "learning_rate": 2.057491847219134e-06,
+ "loss": 0.3997,
+ "step": 205300
+ },
+ {
+ "epoch": 1.0594827185908648,
+ "grad_norm": 26352.986328125,
+ "learning_rate": 2.0471332744578853e-06,
+ "loss": 0.4022,
+ "step": 205350
+ },
+ {
+ "epoch": 1.0597406885734777,
+ "grad_norm": 23545.6640625,
+ "learning_rate": 2.0368002980083235e-06,
+ "loss": 0.3976,
+ "step": 205400
+ },
+ {
+ "epoch": 1.0599986585560903,
+ "grad_norm": 24206.896484375,
+ "learning_rate": 2.02649292338602e-06,
+ "loss": 0.3934,
+ "step": 205450
+ },
+ {
+ "epoch": 1.0602566285387032,
+ "grad_norm": 22331.580078125,
+ "learning_rate": 2.0162111560928345e-06,
+ "loss": 0.3969,
+ "step": 205500
+ },
+ {
+ "epoch": 1.060514598521316,
+ "grad_norm": 24358.099609375,
+ "learning_rate": 2.0059550016169827e-06,
+ "loss": 0.3934,
+ "step": 205550
+ },
+ {
+ "epoch": 1.0607725685039289,
+ "grad_norm": 23970.693359375,
+ "learning_rate": 1.9957244654330133e-06,
+ "loss": 0.4012,
+ "step": 205600
+ },
+ {
+ "epoch": 1.0610305384865417,
+ "grad_norm": 23980.03125,
+ "learning_rate": 1.985519553001758e-06,
+ "loss": 0.3979,
+ "step": 205650
+ },
+ {
+ "epoch": 1.0612885084691546,
+ "grad_norm": 25418.708984375,
+ "learning_rate": 1.9753402697704313e-06,
+ "loss": 0.3988,
+ "step": 205700
+ },
+ {
+ "epoch": 1.0615464784517674,
+ "grad_norm": 22902.38671875,
+ "learning_rate": 1.965186621172521e-06,
+ "loss": 0.393,
+ "step": 205750
+ },
+ {
+ "epoch": 1.0618044484343803,
+ "grad_norm": 24547.4375,
+ "learning_rate": 1.9550586126278525e-06,
+ "loss": 0.4,
+ "step": 205800
+ },
+ {
+ "epoch": 1.062062418416993,
+ "grad_norm": 24038.619140625,
+ "learning_rate": 1.9449562495425623e-06,
+ "loss": 0.3995,
+ "step": 205850
+ },
+ {
+ "epoch": 1.0623203883996057,
+ "grad_norm": 22873.3515625,
+ "learning_rate": 1.9348795373090977e-06,
+ "loss": 0.4028,
+ "step": 205900
+ },
+ {
+ "epoch": 1.0625783583822186,
+ "grad_norm": 22140.7890625,
+ "learning_rate": 1.9248284813061957e-06,
+ "loss": 0.4036,
+ "step": 205950
+ },
+ {
+ "epoch": 1.0628363283648314,
+ "grad_norm": 23617.9140625,
+ "learning_rate": 1.914803086898942e-06,
+ "loss": 0.4005,
+ "step": 206000
+ },
+ {
+ "epoch": 1.0630942983474443,
+ "grad_norm": 22808.267578125,
+ "learning_rate": 1.9048033594386838e-06,
+ "loss": 0.3989,
+ "step": 206050
+ },
+ {
+ "epoch": 1.0633522683300571,
+ "grad_norm": 23189.298828125,
+ "learning_rate": 1.8948293042630794e-06,
+ "loss": 0.3982,
+ "step": 206100
+ },
+ {
+ "epoch": 1.06361023831267,
+ "grad_norm": 23994.052734375,
+ "learning_rate": 1.884880926696092e-06,
+ "loss": 0.4023,
+ "step": 206150
+ },
+ {
+ "epoch": 1.0638682082952828,
+ "grad_norm": 25587.49609375,
+ "learning_rate": 1.8749582320479687e-06,
+ "loss": 0.4056,
+ "step": 206200
+ },
+ {
+ "epoch": 1.0641261782778957,
+ "grad_norm": 22929.3984375,
+ "learning_rate": 1.865061225615261e-06,
+ "loss": 0.3987,
+ "step": 206250
+ },
+ {
+ "epoch": 1.0643841482605083,
+ "grad_norm": 24747.65234375,
+ "learning_rate": 1.8551899126807825e-06,
+ "loss": 0.3959,
+ "step": 206300
+ },
+ {
+ "epoch": 1.0646421182431212,
+ "grad_norm": 24856.77734375,
+ "learning_rate": 1.8453442985136682e-06,
+ "loss": 0.3989,
+ "step": 206350
+ },
+ {
+ "epoch": 1.064900088225734,
+ "grad_norm": 28136.97265625,
+ "learning_rate": 1.835524388369303e-06,
+ "loss": 0.396,
+ "step": 206400
+ },
+ {
+ "epoch": 1.0651580582083469,
+ "grad_norm": 25035.076171875,
+ "learning_rate": 1.8257301874893607e-06,
+ "loss": 0.4,
+ "step": 206450
+ },
+ {
+ "epoch": 1.0654160281909597,
+ "grad_norm": 23690.525390625,
+ "learning_rate": 1.8159617011018205e-06,
+ "loss": 0.3982,
+ "step": 206500
+ },
+ {
+ "epoch": 1.0656739981735726,
+ "grad_norm": 25419.333984375,
+ "learning_rate": 1.8062189344208835e-06,
+ "loss": 0.401,
+ "step": 206550
+ },
+ {
+ "epoch": 1.0659319681561854,
+ "grad_norm": 24045.5703125,
+ "learning_rate": 1.7965018926470622e-06,
+ "loss": 0.3974,
+ "step": 206600
+ },
+ {
+ "epoch": 1.0661899381387983,
+ "grad_norm": 23030.5625,
+ "learning_rate": 1.7868105809671298e-06,
+ "loss": 0.4049,
+ "step": 206650
+ },
+ {
+ "epoch": 1.066447908121411,
+ "grad_norm": 26036.546875,
+ "learning_rate": 1.7771450045541149e-06,
+ "loss": 0.3948,
+ "step": 206700
+ },
+ {
+ "epoch": 1.0667058781040237,
+ "grad_norm": 25025.001953125,
+ "learning_rate": 1.7675051685673127e-06,
+ "loss": 0.404,
+ "step": 206750
+ },
+ {
+ "epoch": 1.0669638480866366,
+ "grad_norm": 24296.775390625,
+ "learning_rate": 1.757891078152285e-06,
+ "loss": 0.3953,
+ "step": 206800
+ },
+ {
+ "epoch": 1.0672218180692494,
+ "grad_norm": 22506.177734375,
+ "learning_rate": 1.748302738440838e-06,
+ "loss": 0.4,
+ "step": 206850
+ },
+ {
+ "epoch": 1.0674797880518623,
+ "grad_norm": 26741.724609375,
+ "learning_rate": 1.738740154551055e-06,
+ "loss": 0.3971,
+ "step": 206900
+ },
+ {
+ "epoch": 1.0677377580344751,
+ "grad_norm": 24435.2421875,
+ "learning_rate": 1.7292033315872592e-06,
+ "loss": 0.3955,
+ "step": 206950
+ },
+ {
+ "epoch": 1.067995728017088,
+ "grad_norm": 21010.521484375,
+ "learning_rate": 1.7196922746400058e-06,
+ "loss": 0.3997,
+ "step": 207000
+ },
+ {
+ "epoch": 1.0682536979997008,
+ "grad_norm": 21215.384765625,
+ "learning_rate": 1.710206988786134e-06,
+ "loss": 0.3971,
+ "step": 207050
+ },
+ {
+ "epoch": 1.0685116679823137,
+ "grad_norm": 23713.73046875,
+ "learning_rate": 1.7007474790886823e-06,
+ "loss": 0.4022,
+ "step": 207100
+ },
+ {
+ "epoch": 1.0687696379649263,
+ "grad_norm": 26804.658203125,
+ "learning_rate": 1.691313750596979e-06,
+ "loss": 0.4028,
+ "step": 207150
+ },
+ {
+ "epoch": 1.0690276079475391,
+ "grad_norm": 24873.318359375,
+ "learning_rate": 1.68190580834654e-06,
+ "loss": 0.3995,
+ "step": 207200
+ },
+ {
+ "epoch": 1.069285577930152,
+ "grad_norm": 23567.91796875,
+ "learning_rate": 1.6725236573591596e-06,
+ "loss": 0.3988,
+ "step": 207250
+ },
+ {
+ "epoch": 1.0695435479127648,
+ "grad_norm": 23659.513671875,
+ "learning_rate": 1.6631673026428484e-06,
+ "loss": 0.4036,
+ "step": 207300
+ },
+ {
+ "epoch": 1.0698015178953777,
+ "grad_norm": 22014.623046875,
+ "learning_rate": 1.6538367491918339e-06,
+ "loss": 0.4003,
+ "step": 207350
+ },
+ {
+ "epoch": 1.0700594878779905,
+ "grad_norm": 26985.240234375,
+ "learning_rate": 1.6445320019865984e-06,
+ "loss": 0.3949,
+ "step": 207400
+ },
+ {
+ "epoch": 1.0703174578606034,
+ "grad_norm": 25032.328125,
+ "learning_rate": 1.635253065993836e-06,
+ "loss": 0.4072,
+ "step": 207450
+ },
+ {
+ "epoch": 1.0705754278432162,
+ "grad_norm": 23999.62890625,
+ "learning_rate": 1.6259999461664566e-06,
+ "loss": 0.4018,
+ "step": 207500
+ },
+ {
+ "epoch": 1.070833397825829,
+ "grad_norm": 24842.439453125,
+ "learning_rate": 1.616772647443593e-06,
+ "loss": 0.3992,
+ "step": 207550
+ },
+ {
+ "epoch": 1.0710913678084417,
+ "grad_norm": 26740.083984375,
+ "learning_rate": 1.6075711747506106e-06,
+ "loss": 0.3954,
+ "step": 207600
+ },
+ {
+ "epoch": 1.0713493377910546,
+ "grad_norm": 25067.95703125,
+ "learning_rate": 1.598395532999064e-06,
+ "loss": 0.4008,
+ "step": 207650
+ },
+ {
+ "epoch": 1.0716073077736674,
+ "grad_norm": 22218.814453125,
+ "learning_rate": 1.5892457270867467e-06,
+ "loss": 0.4005,
+ "step": 207700
+ },
+ {
+ "epoch": 1.0718652777562803,
+ "grad_norm": 25727.36328125,
+ "learning_rate": 1.5801217618976294e-06,
+ "loss": 0.402,
+ "step": 207750
+ },
+ {
+ "epoch": 1.0721232477388931,
+ "grad_norm": 24692.19921875,
+ "learning_rate": 1.5710236423019275e-06,
+ "loss": 0.4035,
+ "step": 207800
+ },
+ {
+ "epoch": 1.072381217721506,
+ "grad_norm": 25514.009765625,
+ "learning_rate": 1.5619513731560342e-06,
+ "loss": 0.3964,
+ "step": 207850
+ },
+ {
+ "epoch": 1.0726391877041188,
+ "grad_norm": 24503.408203125,
+ "learning_rate": 1.5529049593025425e-06,
+ "loss": 0.4036,
+ "step": 207900
+ },
+ {
+ "epoch": 1.0728971576867317,
+ "grad_norm": 27466.498046875,
+ "learning_rate": 1.5438844055702728e-06,
+ "loss": 0.4019,
+ "step": 207950
+ },
+ {
+ "epoch": 1.0731551276693443,
+ "grad_norm": 24170.1171875,
+ "learning_rate": 1.5348897167742015e-06,
+ "loss": 0.4005,
+ "step": 208000
+ },
+ {
+ "epoch": 1.0734130976519571,
+ "grad_norm": 24094.044921875,
+ "learning_rate": 1.525920897715527e-06,
+ "loss": 0.402,
+ "step": 208050
+ },
+ {
+ "epoch": 1.07367106763457,
+ "grad_norm": 22958.8125,
+ "learning_rate": 1.5169779531816365e-06,
+ "loss": 0.4041,
+ "step": 208100
+ },
+ {
+ "epoch": 1.0739290376171828,
+ "grad_norm": 24056.849609375,
+ "learning_rate": 1.508060887946089e-06,
+ "loss": 0.4017,
+ "step": 208150
+ },
+ {
+ "epoch": 1.0741870075997957,
+ "grad_norm": 22397.435546875,
+ "learning_rate": 1.499169706768655e-06,
+ "loss": 0.4036,
+ "step": 208200
+ },
+ {
+ "epoch": 1.0744449775824085,
+ "grad_norm": 22317.74609375,
+ "learning_rate": 1.4903044143952604e-06,
+ "loss": 0.3992,
+ "step": 208250
+ },
+ {
+ "epoch": 1.0747029475650214,
+ "grad_norm": 22923.57421875,
+ "learning_rate": 1.4814650155580367e-06,
+ "loss": 0.3953,
+ "step": 208300
+ },
+ {
+ "epoch": 1.0749609175476342,
+ "grad_norm": 24276.650390625,
+ "learning_rate": 1.4726515149752818e-06,
+ "loss": 0.4052,
+ "step": 208350
+ },
+ {
+ "epoch": 1.075218887530247,
+ "grad_norm": 27791.369140625,
+ "learning_rate": 1.4638639173514712e-06,
+ "loss": 0.4027,
+ "step": 208400
+ },
+ {
+ "epoch": 1.0754768575128597,
+ "grad_norm": 22683.73046875,
+ "learning_rate": 1.4551022273772585e-06,
+ "loss": 0.4036,
+ "step": 208450
+ },
+ {
+ "epoch": 1.0757348274954726,
+ "grad_norm": 26474.087890625,
+ "learning_rate": 1.4463664497294527e-06,
+ "loss": 0.3966,
+ "step": 208500
+ },
+ {
+ "epoch": 1.0759927974780854,
+ "grad_norm": 25933.25390625,
+ "learning_rate": 1.4376565890710514e-06,
+ "loss": 0.4042,
+ "step": 208550
+ },
+ {
+ "epoch": 1.0762507674606983,
+ "grad_norm": 23373.078125,
+ "learning_rate": 1.4289726500512134e-06,
+ "loss": 0.3971,
+ "step": 208600
+ },
+ {
+ "epoch": 1.076508737443311,
+ "grad_norm": 23282.916015625,
+ "learning_rate": 1.4203146373052423e-06,
+ "loss": 0.4038,
+ "step": 208650
+ },
+ {
+ "epoch": 1.076766707425924,
+ "grad_norm": 26307.12109375,
+ "learning_rate": 1.4116825554546353e-06,
+ "loss": 0.3959,
+ "step": 208700
+ },
+ {
+ "epoch": 1.0770246774085368,
+ "grad_norm": 24472.884765625,
+ "learning_rate": 1.4030764091070237e-06,
+ "loss": 0.3954,
+ "step": 208750
+ },
+ {
+ "epoch": 1.0772826473911497,
+ "grad_norm": 25234.9375,
+ "learning_rate": 1.394496202856188e-06,
+ "loss": 0.4003,
+ "step": 208800
+ },
+ {
+ "epoch": 1.0775406173737623,
+ "grad_norm": 31742.607421875,
+ "learning_rate": 1.385941941282104e-06,
+ "loss": 0.3963,
+ "step": 208850
+ },
+ {
+ "epoch": 1.0777985873563751,
+ "grad_norm": 22577.599609375,
+ "learning_rate": 1.3774136289508466e-06,
+ "loss": 0.4003,
+ "step": 208900
+ },
+ {
+ "epoch": 1.078056557338988,
+ "grad_norm": 24765.111328125,
+ "learning_rate": 1.3689112704146745e-06,
+ "loss": 0.3965,
+ "step": 208950
+ },
+ {
+ "epoch": 1.0783145273216008,
+ "grad_norm": 24935.205078125,
+ "learning_rate": 1.3604348702119795e-06,
+ "loss": 0.4001,
+ "step": 209000
+ },
+ {
+ "epoch": 1.0785724973042137,
+ "grad_norm": 25825.361328125,
+ "learning_rate": 1.3519844328673037e-06,
+ "loss": 0.4014,
+ "step": 209050
+ },
+ {
+ "epoch": 1.0788304672868265,
+ "grad_norm": 23713.068359375,
+ "learning_rate": 1.343559962891322e-06,
+ "loss": 0.3961,
+ "step": 209100
+ },
+ {
+ "epoch": 1.0790884372694394,
+ "grad_norm": 24578.435546875,
+ "learning_rate": 1.3351614647808542e-06,
+ "loss": 0.3983,
+ "step": 209150
+ },
+ {
+ "epoch": 1.0793464072520522,
+ "grad_norm": 22323.19140625,
+ "learning_rate": 1.3267889430188585e-06,
+ "loss": 0.4005,
+ "step": 209200
+ },
+ {
+ "epoch": 1.079604377234665,
+ "grad_norm": 22834.76953125,
+ "learning_rate": 1.3184424020744212e-06,
+ "loss": 0.3964,
+ "step": 209250
+ },
+ {
+ "epoch": 1.0798623472172777,
+ "grad_norm": 22097.615234375,
+ "learning_rate": 1.3101218464027676e-06,
+ "loss": 0.3932,
+ "step": 209300
+ },
+ {
+ "epoch": 1.0801203171998905,
+ "grad_norm": 23564.677734375,
+ "learning_rate": 1.3018272804452503e-06,
+ "loss": 0.3996,
+ "step": 209350
+ },
+ {
+ "epoch": 1.0803782871825034,
+ "grad_norm": 25264.150390625,
+ "learning_rate": 1.2935587086293443e-06,
+ "loss": 0.3975,
+ "step": 209400
+ },
+ {
+ "epoch": 1.0806362571651162,
+ "grad_norm": 22622.1015625,
+ "learning_rate": 1.2853161353686526e-06,
+ "loss": 0.4028,
+ "step": 209450
+ },
+ {
+ "epoch": 1.080894227147729,
+ "grad_norm": 25768.478515625,
+ "learning_rate": 1.2770995650629058e-06,
+ "loss": 0.395,
+ "step": 209500
+ },
+ {
+ "epoch": 1.081152197130342,
+ "grad_norm": 21997.23828125,
+ "learning_rate": 1.2689090020979455e-06,
+ "loss": 0.3997,
+ "step": 209550
+ },
+ {
+ "epoch": 1.0814101671129548,
+ "grad_norm": 24653.796875,
+ "learning_rate": 1.26074445084573e-06,
+ "loss": 0.3992,
+ "step": 209600
+ },
+ {
+ "epoch": 1.0816681370955676,
+ "grad_norm": 25631.18359375,
+ "learning_rate": 1.252605915664362e-06,
+ "loss": 0.4006,
+ "step": 209650
+ },
+ {
+ "epoch": 1.0819261070781803,
+ "grad_norm": 25373.162109375,
+ "learning_rate": 1.2444934008980058e-06,
+ "loss": 0.4047,
+ "step": 209700
+ },
+ {
+ "epoch": 1.0821840770607931,
+ "grad_norm": 23108.03125,
+ "learning_rate": 1.2364069108769804e-06,
+ "loss": 0.3994,
+ "step": 209750
+ },
+ {
+ "epoch": 1.082442047043406,
+ "grad_norm": 23362.0546875,
+ "learning_rate": 1.2283464499177e-06,
+ "loss": 0.4059,
+ "step": 209800
+ },
+ {
+ "epoch": 1.0827000170260188,
+ "grad_norm": 19350.4609375,
+ "learning_rate": 1.2203120223226727e-06,
+ "loss": 0.4012,
+ "step": 209850
+ },
+ {
+ "epoch": 1.0829579870086317,
+ "grad_norm": 24877.921875,
+ "learning_rate": 1.2123036323805237e-06,
+ "loss": 0.3989,
+ "step": 209900
+ },
+ {
+ "epoch": 1.0832159569912445,
+ "grad_norm": 25544.15625,
+ "learning_rate": 1.2043212843659724e-06,
+ "loss": 0.3963,
+ "step": 209950
+ },
+ {
+ "epoch": 1.0834739269738574,
+ "grad_norm": 24271.380859375,
+ "learning_rate": 1.1963649825398494e-06,
+ "loss": 0.4026,
+ "step": 210000
+ },
+ {
+ "epoch": 1.0834739269738574,
+ "eval_loss": 0.3863469064235687,
+ "eval_runtime": 3245.2698,
+ "eval_samples_per_second": 955.582,
+ "eval_steps_per_second": 1.866,
+ "step": 210000
+ }
+ ],
+ "logging_steps": 50,
+ "max_steps": 225000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 2,
+ "save_steps": 5000,
+ "stateful_callbacks": {
+ "EarlyStoppingCallback": {
+ "args": {
+ "early_stopping_patience": 5,
+ "early_stopping_threshold": 0.0
+ },
+ "attributes": {
+ "early_stopping_patience_counter": 0
+ }
+ },
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.825838697690079e+17,
+ "train_batch_size": 128,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed
+size 5304
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-210000/vocab.json
@@ -0,0 +1 @@
+{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b4e7a54fdf8bdda8d2a7ac6356523b75cecb2eb5
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/config.json
@@ -0,0 +1,44 @@
+{
+ "architectures": [
+ "GloMeModelForMaskedLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.1,
+ "bos_token_id": 28,
+ "classifier_activation": "gelu",
+ "classifier_bias": false,
+ "classifier_dropout": 0.1,
+ "classifier_pooling": "cls",
+ "cls_token_id": 28,
+ "compress_block_size": 16,
+ "compress_block_sliding_stride": 16,
+ "decoder_bias": true,
+ "dice_weight": 0.0,
+ "embedding_dropout": 0.1,
+ "eos_token_id": 29,
+ "hidden_activation": "gelu",
+ "hidden_size": 320,
+ "inner_rank": 32,
+ "intermediate_size": 1280,
+ "kv_heads": 10,
+ "mask_token_id": 31,
+ "mlp_bias": false,
+ "mlp_dropout": 0.1,
+ "model_size": "tiny",
+ "model_type": "glome",
+ "norm_bias": false,
+ "norm_eps": 1e-05,
+ "num_attention_heads": 20,
+ "num_hidden_layers": 6,
+ "num_selected_blocks": 8,
+ "num_slots": 64,
+ "pad_token_id": 30,
+ "reference_compile": null,
+ "selection_block_size": 16,
+ "sep_token_id": 29,
+ "sliding_window_size": 0,
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.3",
+ "unk_token_id": 27,
+ "vocab_size": 36
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt b/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e7f1fd94996c8e2b65adea828af1b398eace61f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/merges.txt
@@ -0,0 +1 @@
+#version: 0.2
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors b/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..af986ededba173b18731c67bb33527bc0c72d958
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:725530a204fe419bdd8e2a4edec8a015eba100ba76e0afb7a0d980312b63ddcd
+size 61429032
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7852f4ac1775b4a124f8b01b6dec472823c8360e
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52fa15f82ebc4b95c156cc950e35d3872a0ad0324045a251738e568568579b51
+size 122968954
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth b/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7cf378f6781eee516e16f9aaed3717745152038c
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:604a00dce62859f6b063100785ea5aed25010c438c435155b45a6eccf452b0a9
+size 14244
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..238aeec6738a4c5b7712e73dd269e153ea9d891e
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
+size 988
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt b/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e7eae53f649c018ceae4f7a20dad67f78793923
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa362a246a873fcb06a969bc3dc63f8cef55ddbb99c9ce75ae91e3bd8e19ed28
+size 1064
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..14f7c9ed7b0bde6d23ee7b6a24ac2996789d1a0b
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/special_tokens_map.json
@@ -0,0 +1,51 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "cls_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "sep_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ece9b8e6fa70a006c5c10c47e30c9cff4ff95f0
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer.json
@@ -0,0 +1,123 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 27,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 28,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 29,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 30,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": true,
+ "special": true
+ },
+ {
+ "id": 31,
+ "content": "",
+ "single_word": false,
+ "lstrip": true,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": null,
+ "pre_tokenizer": {
+ "type": "ByteLevel",
+ "add_prefix_space": false,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "post_processor": {
+ "type": "RobertaProcessing",
+ "sep": [
+ "",
+ 29
+ ],
+ "cls": [
+ "",
+ 28
+ ],
+ "trim_offsets": true,
+ "add_prefix_space": false
+ },
+ "decoder": {
+ "type": "ByteLevel",
+ "add_prefix_space": true,
+ "trim_offsets": true,
+ "use_regex": true
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": null,
+ "continuing_subword_prefix": "",
+ "end_of_word_suffix": "",
+ "fuse_unk": false,
+ "byte_fallback": false,
+ "ignore_merges": false,
+ "vocab": {
+ "A": 0,
+ "R": 1,
+ "N": 2,
+ "D": 3,
+ "C": 4,
+ "Q": 5,
+ "E": 6,
+ "G": 7,
+ "H": 8,
+ "I": 9,
+ "L": 10,
+ "K": 11,
+ "M": 12,
+ "F": 13,
+ "P": 14,
+ "S": 15,
+ "T": 16,
+ "W": 17,
+ "Y": 18,
+ "V": 19,
+ "X": 20,
+ "B": 21,
+ "U": 22,
+ "Z": 23,
+ "O": 24,
+ ".": 25,
+ "-": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31
+ },
+ "merges": []
+ }
+}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c77f0533c6d3bd60b0a23b8adfacc351923d671
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/tokenizer_config.json
@@ -0,0 +1,58 @@
+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "27": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "28": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "29": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "30": {
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "31": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "errors": "replace",
+ "extra_special_tokens": {},
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "tokenizer_class": "RobertaTokenizer",
+ "trim_offsets": true,
+ "unk_token": ""
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..73974255fb8ff73559573fcbdd6aad8d1adc28b8
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/trainer_state.json
@@ -0,0 +1,30487 @@
+{
+ "best_global_step": null,
+ "best_metric": 0.3860665559768677,
+ "best_model_checkpoint": null,
+ "epoch": 1.1092709252351396,
+ "eval_steps": 5000,
+ "global_step": 215000,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.0002579699826128232,
+ "grad_norm": 314643.03125,
+ "learning_rate": 4.9e-07,
+ "loss": 3.7018,
+ "step": 50
+ },
+ {
+ "epoch": 0.0005159399652256464,
+ "grad_norm": 286448.65625,
+ "learning_rate": 9.9e-07,
+ "loss": 3.6146,
+ "step": 100
+ },
+ {
+ "epoch": 0.0007739099478384695,
+ "grad_norm": 214582.28125,
+ "learning_rate": 1.4900000000000001e-06,
+ "loss": 3.4562,
+ "step": 150
+ },
+ {
+ "epoch": 0.0010318799304512927,
+ "grad_norm": 137193.9375,
+ "learning_rate": 1.99e-06,
+ "loss": 3.2558,
+ "step": 200
+ },
+ {
+ "epoch": 0.0012898499130641159,
+ "grad_norm": 82222.84375,
+ "learning_rate": 2.49e-06,
+ "loss": 3.0641,
+ "step": 250
+ },
+ {
+ "epoch": 0.001547819895676939,
+ "grad_norm": 56772.109375,
+ "learning_rate": 2.99e-06,
+ "loss": 2.9123,
+ "step": 300
+ },
+ {
+ "epoch": 0.0018057898782897622,
+ "grad_norm": 46737.6796875,
+ "learning_rate": 3.49e-06,
+ "loss": 2.8131,
+ "step": 350
+ },
+ {
+ "epoch": 0.0020637598609025854,
+ "grad_norm": 38769.04296875,
+ "learning_rate": 3.99e-06,
+ "loss": 2.7526,
+ "step": 400
+ },
+ {
+ "epoch": 0.0023217298435154084,
+ "grad_norm": 34701.5546875,
+ "learning_rate": 4.49e-06,
+ "loss": 2.7178,
+ "step": 450
+ },
+ {
+ "epoch": 0.0025796998261282318,
+ "grad_norm": 30332.826171875,
+ "learning_rate": 4.9900000000000005e-06,
+ "loss": 2.6967,
+ "step": 500
+ },
+ {
+ "epoch": 0.0028376698087410547,
+ "grad_norm": 27192.7421875,
+ "learning_rate": 5.49e-06,
+ "loss": 2.6798,
+ "step": 550
+ },
+ {
+ "epoch": 0.003095639791353878,
+ "grad_norm": 36959.33984375,
+ "learning_rate": 5.99e-06,
+ "loss": 2.668,
+ "step": 600
+ },
+ {
+ "epoch": 0.003353609773966701,
+ "grad_norm": 30939.365234375,
+ "learning_rate": 6.4900000000000005e-06,
+ "loss": 2.6572,
+ "step": 650
+ },
+ {
+ "epoch": 0.0036115797565795245,
+ "grad_norm": 26976.78125,
+ "learning_rate": 6.990000000000001e-06,
+ "loss": 2.6397,
+ "step": 700
+ },
+ {
+ "epoch": 0.0038695497391923474,
+ "grad_norm": 32308.447265625,
+ "learning_rate": 7.4899999999999994e-06,
+ "loss": 2.6021,
+ "step": 750
+ },
+ {
+ "epoch": 0.004127519721805171,
+ "grad_norm": 33838.8046875,
+ "learning_rate": 7.99e-06,
+ "loss": 2.5058,
+ "step": 800
+ },
+ {
+ "epoch": 0.004385489704417994,
+ "grad_norm": 49298.84765625,
+ "learning_rate": 8.49e-06,
+ "loss": 2.4095,
+ "step": 850
+ },
+ {
+ "epoch": 0.004643459687030817,
+ "grad_norm": 36241.56640625,
+ "learning_rate": 8.99e-06,
+ "loss": 2.2961,
+ "step": 900
+ },
+ {
+ "epoch": 0.004901429669643641,
+ "grad_norm": 37884.82421875,
+ "learning_rate": 9.49e-06,
+ "loss": 2.1526,
+ "step": 950
+ },
+ {
+ "epoch": 0.0051593996522564635,
+ "grad_norm": 36827.66796875,
+ "learning_rate": 9.990000000000001e-06,
+ "loss": 2.0338,
+ "step": 1000
+ },
+ {
+ "epoch": 0.0054173696348692865,
+ "grad_norm": 41398.421875,
+ "learning_rate": 1.049e-05,
+ "loss": 1.939,
+ "step": 1050
+ },
+ {
+ "epoch": 0.0056753396174821094,
+ "grad_norm": 42648.38671875,
+ "learning_rate": 1.099e-05,
+ "loss": 1.8325,
+ "step": 1100
+ },
+ {
+ "epoch": 0.005933309600094933,
+ "grad_norm": 40172.9453125,
+ "learning_rate": 1.149e-05,
+ "loss": 1.7643,
+ "step": 1150
+ },
+ {
+ "epoch": 0.006191279582707756,
+ "grad_norm": 35860.8515625,
+ "learning_rate": 1.199e-05,
+ "loss": 1.6742,
+ "step": 1200
+ },
+ {
+ "epoch": 0.006449249565320579,
+ "grad_norm": 44456.93359375,
+ "learning_rate": 1.249e-05,
+ "loss": 1.6026,
+ "step": 1250
+ },
+ {
+ "epoch": 0.006707219547933402,
+ "grad_norm": 36839.08984375,
+ "learning_rate": 1.299e-05,
+ "loss": 1.521,
+ "step": 1300
+ },
+ {
+ "epoch": 0.006965189530546226,
+ "grad_norm": 44026.68359375,
+ "learning_rate": 1.349e-05,
+ "loss": 1.4436,
+ "step": 1350
+ },
+ {
+ "epoch": 0.007223159513159049,
+ "grad_norm": 35557.578125,
+ "learning_rate": 1.399e-05,
+ "loss": 1.3773,
+ "step": 1400
+ },
+ {
+ "epoch": 0.007481129495771872,
+ "grad_norm": 38767.60546875,
+ "learning_rate": 1.449e-05,
+ "loss": 1.3023,
+ "step": 1450
+ },
+ {
+ "epoch": 0.007739099478384695,
+ "grad_norm": 36654.796875,
+ "learning_rate": 1.499e-05,
+ "loss": 1.2627,
+ "step": 1500
+ },
+ {
+ "epoch": 0.007997069460997519,
+ "grad_norm": 41690.328125,
+ "learning_rate": 1.5490000000000002e-05,
+ "loss": 1.2063,
+ "step": 1550
+ },
+ {
+ "epoch": 0.008255039443610342,
+ "grad_norm": 38743.59375,
+ "learning_rate": 1.599e-05,
+ "loss": 1.1626,
+ "step": 1600
+ },
+ {
+ "epoch": 0.008513009426223165,
+ "grad_norm": 41839.7890625,
+ "learning_rate": 1.649e-05,
+ "loss": 1.1225,
+ "step": 1650
+ },
+ {
+ "epoch": 0.008770979408835988,
+ "grad_norm": 42897.0703125,
+ "learning_rate": 1.699e-05,
+ "loss": 1.0864,
+ "step": 1700
+ },
+ {
+ "epoch": 0.00902894939144881,
+ "grad_norm": 37412.30859375,
+ "learning_rate": 1.749e-05,
+ "loss": 1.0613,
+ "step": 1750
+ },
+ {
+ "epoch": 0.009286919374061633,
+ "grad_norm": 37235.484375,
+ "learning_rate": 1.7990000000000002e-05,
+ "loss": 1.0354,
+ "step": 1800
+ },
+ {
+ "epoch": 0.009544889356674458,
+ "grad_norm": 39117.6328125,
+ "learning_rate": 1.849e-05,
+ "loss": 1.0059,
+ "step": 1850
+ },
+ {
+ "epoch": 0.009802859339287281,
+ "grad_norm": 37297.6875,
+ "learning_rate": 1.8990000000000003e-05,
+ "loss": 0.9795,
+ "step": 1900
+ },
+ {
+ "epoch": 0.010060829321900104,
+ "grad_norm": 33772.24609375,
+ "learning_rate": 1.949e-05,
+ "loss": 0.9639,
+ "step": 1950
+ },
+ {
+ "epoch": 0.010318799304512927,
+ "grad_norm": 39775.046875,
+ "learning_rate": 1.999e-05,
+ "loss": 0.9386,
+ "step": 2000
+ },
+ {
+ "epoch": 0.01057676928712575,
+ "grad_norm": 38412.2109375,
+ "learning_rate": 2.0490000000000002e-05,
+ "loss": 0.9212,
+ "step": 2050
+ },
+ {
+ "epoch": 0.010834739269738573,
+ "grad_norm": 39548.98046875,
+ "learning_rate": 2.099e-05,
+ "loss": 0.9112,
+ "step": 2100
+ },
+ {
+ "epoch": 0.011092709252351396,
+ "grad_norm": 38127.77734375,
+ "learning_rate": 2.1490000000000003e-05,
+ "loss": 0.8866,
+ "step": 2150
+ },
+ {
+ "epoch": 0.011350679234964219,
+ "grad_norm": 39877.0390625,
+ "learning_rate": 2.199e-05,
+ "loss": 0.8806,
+ "step": 2200
+ },
+ {
+ "epoch": 0.011608649217577044,
+ "grad_norm": 34642.28515625,
+ "learning_rate": 2.249e-05,
+ "loss": 0.8645,
+ "step": 2250
+ },
+ {
+ "epoch": 0.011866619200189867,
+ "grad_norm": 38508.0078125,
+ "learning_rate": 2.2990000000000002e-05,
+ "loss": 0.8609,
+ "step": 2300
+ },
+ {
+ "epoch": 0.01212458918280269,
+ "grad_norm": 33287.765625,
+ "learning_rate": 2.349e-05,
+ "loss": 0.8443,
+ "step": 2350
+ },
+ {
+ "epoch": 0.012382559165415512,
+ "grad_norm": 35477.5546875,
+ "learning_rate": 2.3990000000000002e-05,
+ "loss": 0.839,
+ "step": 2400
+ },
+ {
+ "epoch": 0.012640529148028335,
+ "grad_norm": 32204.408203125,
+ "learning_rate": 2.449e-05,
+ "loss": 0.8204,
+ "step": 2450
+ },
+ {
+ "epoch": 0.012898499130641158,
+ "grad_norm": 35113.59765625,
+ "learning_rate": 2.4990000000000003e-05,
+ "loss": 0.8214,
+ "step": 2500
+ },
+ {
+ "epoch": 0.013156469113253981,
+ "grad_norm": 36591.2421875,
+ "learning_rate": 2.549e-05,
+ "loss": 0.8066,
+ "step": 2550
+ },
+ {
+ "epoch": 0.013414439095866804,
+ "grad_norm": 37926.3125,
+ "learning_rate": 2.5990000000000004e-05,
+ "loss": 0.7993,
+ "step": 2600
+ },
+ {
+ "epoch": 0.013672409078479627,
+ "grad_norm": 35413.01171875,
+ "learning_rate": 2.6490000000000002e-05,
+ "loss": 0.8012,
+ "step": 2650
+ },
+ {
+ "epoch": 0.013930379061092452,
+ "grad_norm": 33275.1796875,
+ "learning_rate": 2.6989999999999997e-05,
+ "loss": 0.7879,
+ "step": 2700
+ },
+ {
+ "epoch": 0.014188349043705275,
+ "grad_norm": 35463.87109375,
+ "learning_rate": 2.749e-05,
+ "loss": 0.7808,
+ "step": 2750
+ },
+ {
+ "epoch": 0.014446319026318098,
+ "grad_norm": 33143.234375,
+ "learning_rate": 2.7989999999999998e-05,
+ "loss": 0.7813,
+ "step": 2800
+ },
+ {
+ "epoch": 0.01470428900893092,
+ "grad_norm": 32908.71484375,
+ "learning_rate": 2.849e-05,
+ "loss": 0.7725,
+ "step": 2850
+ },
+ {
+ "epoch": 0.014962258991543744,
+ "grad_norm": 36443.578125,
+ "learning_rate": 2.8990000000000002e-05,
+ "loss": 0.761,
+ "step": 2900
+ },
+ {
+ "epoch": 0.015220228974156567,
+ "grad_norm": 32331.728515625,
+ "learning_rate": 2.949e-05,
+ "loss": 0.7588,
+ "step": 2950
+ },
+ {
+ "epoch": 0.01547819895676939,
+ "grad_norm": 33401.546875,
+ "learning_rate": 2.9990000000000003e-05,
+ "loss": 0.7462,
+ "step": 3000
+ },
+ {
+ "epoch": 0.015736168939382213,
+ "grad_norm": 32041.26171875,
+ "learning_rate": 3.049e-05,
+ "loss": 0.7449,
+ "step": 3050
+ },
+ {
+ "epoch": 0.015994138921995037,
+ "grad_norm": 32035.814453125,
+ "learning_rate": 3.099e-05,
+ "loss": 0.7373,
+ "step": 3100
+ },
+ {
+ "epoch": 0.01625210890460786,
+ "grad_norm": 31430.421875,
+ "learning_rate": 3.1490000000000005e-05,
+ "loss": 0.7371,
+ "step": 3150
+ },
+ {
+ "epoch": 0.016510078887220683,
+ "grad_norm": 30911.267578125,
+ "learning_rate": 3.1990000000000004e-05,
+ "loss": 0.7315,
+ "step": 3200
+ },
+ {
+ "epoch": 0.016768048869833505,
+ "grad_norm": 31906.193359375,
+ "learning_rate": 3.249e-05,
+ "loss": 0.7405,
+ "step": 3250
+ },
+ {
+ "epoch": 0.01702601885244633,
+ "grad_norm": 30320.1640625,
+ "learning_rate": 3.299e-05,
+ "loss": 0.7323,
+ "step": 3300
+ },
+ {
+ "epoch": 0.017283988835059154,
+ "grad_norm": 32357.072265625,
+ "learning_rate": 3.349e-05,
+ "loss": 0.7244,
+ "step": 3350
+ },
+ {
+ "epoch": 0.017541958817671975,
+ "grad_norm": 34023.2109375,
+ "learning_rate": 3.399e-05,
+ "loss": 0.7214,
+ "step": 3400
+ },
+ {
+ "epoch": 0.0177999288002848,
+ "grad_norm": 33940.8046875,
+ "learning_rate": 3.449e-05,
+ "loss": 0.7158,
+ "step": 3450
+ },
+ {
+ "epoch": 0.01805789878289762,
+ "grad_norm": 31701.14453125,
+ "learning_rate": 3.499e-05,
+ "loss": 0.7102,
+ "step": 3500
+ },
+ {
+ "epoch": 0.018315868765510446,
+ "grad_norm": 32291.861328125,
+ "learning_rate": 3.549e-05,
+ "loss": 0.7104,
+ "step": 3550
+ },
+ {
+ "epoch": 0.018573838748123267,
+ "grad_norm": 28074.177734375,
+ "learning_rate": 3.599e-05,
+ "loss": 0.7001,
+ "step": 3600
+ },
+ {
+ "epoch": 0.01883180873073609,
+ "grad_norm": 29823.787109375,
+ "learning_rate": 3.6490000000000005e-05,
+ "loss": 0.7029,
+ "step": 3650
+ },
+ {
+ "epoch": 0.019089778713348916,
+ "grad_norm": 29792.24609375,
+ "learning_rate": 3.699e-05,
+ "loss": 0.6949,
+ "step": 3700
+ },
+ {
+ "epoch": 0.019347748695961738,
+ "grad_norm": 31345.296875,
+ "learning_rate": 3.749e-05,
+ "loss": 0.6989,
+ "step": 3750
+ },
+ {
+ "epoch": 0.019605718678574562,
+ "grad_norm": 33923.0625,
+ "learning_rate": 3.799e-05,
+ "loss": 0.6984,
+ "step": 3800
+ },
+ {
+ "epoch": 0.019863688661187383,
+ "grad_norm": 30762.97265625,
+ "learning_rate": 3.8490000000000006e-05,
+ "loss": 0.6931,
+ "step": 3850
+ },
+ {
+ "epoch": 0.020121658643800208,
+ "grad_norm": 30794.13671875,
+ "learning_rate": 3.8990000000000004e-05,
+ "loss": 0.6923,
+ "step": 3900
+ },
+ {
+ "epoch": 0.02037962862641303,
+ "grad_norm": 29854.923828125,
+ "learning_rate": 3.9489999999999996e-05,
+ "loss": 0.6895,
+ "step": 3950
+ },
+ {
+ "epoch": 0.020637598609025854,
+ "grad_norm": 27336.958984375,
+ "learning_rate": 3.999e-05,
+ "loss": 0.6853,
+ "step": 4000
+ },
+ {
+ "epoch": 0.020895568591638675,
+ "grad_norm": 31836.81640625,
+ "learning_rate": 4.049e-05,
+ "loss": 0.6821,
+ "step": 4050
+ },
+ {
+ "epoch": 0.0211535385742515,
+ "grad_norm": 28508.548828125,
+ "learning_rate": 4.099e-05,
+ "loss": 0.6857,
+ "step": 4100
+ },
+ {
+ "epoch": 0.021411508556864325,
+ "grad_norm": 30309.2421875,
+ "learning_rate": 4.1490000000000004e-05,
+ "loss": 0.6791,
+ "step": 4150
+ },
+ {
+ "epoch": 0.021669478539477146,
+ "grad_norm": 31035.0703125,
+ "learning_rate": 4.199e-05,
+ "loss": 0.6762,
+ "step": 4200
+ },
+ {
+ "epoch": 0.02192744852208997,
+ "grad_norm": 30893.951171875,
+ "learning_rate": 4.249e-05,
+ "loss": 0.6739,
+ "step": 4250
+ },
+ {
+ "epoch": 0.022185418504702792,
+ "grad_norm": 28317.12890625,
+ "learning_rate": 4.299e-05,
+ "loss": 0.6635,
+ "step": 4300
+ },
+ {
+ "epoch": 0.022443388487315617,
+ "grad_norm": 27140.29296875,
+ "learning_rate": 4.3490000000000005e-05,
+ "loss": 0.6694,
+ "step": 4350
+ },
+ {
+ "epoch": 0.022701358469928438,
+ "grad_norm": 27948.32421875,
+ "learning_rate": 4.3990000000000004e-05,
+ "loss": 0.6667,
+ "step": 4400
+ },
+ {
+ "epoch": 0.022959328452541262,
+ "grad_norm": 27243.44140625,
+ "learning_rate": 4.449e-05,
+ "loss": 0.6689,
+ "step": 4450
+ },
+ {
+ "epoch": 0.023217298435154087,
+ "grad_norm": 29163.98828125,
+ "learning_rate": 4.499e-05,
+ "loss": 0.6639,
+ "step": 4500
+ },
+ {
+ "epoch": 0.02347526841776691,
+ "grad_norm": 27801.79296875,
+ "learning_rate": 4.549000000000001e-05,
+ "loss": 0.6612,
+ "step": 4550
+ },
+ {
+ "epoch": 0.023733238400379733,
+ "grad_norm": 28201.7265625,
+ "learning_rate": 4.599e-05,
+ "loss": 0.6608,
+ "step": 4600
+ },
+ {
+ "epoch": 0.023991208382992554,
+ "grad_norm": 28875.06640625,
+ "learning_rate": 4.649e-05,
+ "loss": 0.6642,
+ "step": 4650
+ },
+ {
+ "epoch": 0.02424917836560538,
+ "grad_norm": 25467.376953125,
+ "learning_rate": 4.699e-05,
+ "loss": 0.6513,
+ "step": 4700
+ },
+ {
+ "epoch": 0.0245071483482182,
+ "grad_norm": 27359.97265625,
+ "learning_rate": 4.749e-05,
+ "loss": 0.6554,
+ "step": 4750
+ },
+ {
+ "epoch": 0.024765118330831025,
+ "grad_norm": 30614.15234375,
+ "learning_rate": 4.799e-05,
+ "loss": 0.6574,
+ "step": 4800
+ },
+ {
+ "epoch": 0.025023088313443846,
+ "grad_norm": 29069.677734375,
+ "learning_rate": 4.8490000000000005e-05,
+ "loss": 0.6562,
+ "step": 4850
+ },
+ {
+ "epoch": 0.02528105829605667,
+ "grad_norm": 27337.37109375,
+ "learning_rate": 4.8990000000000004e-05,
+ "loss": 0.6507,
+ "step": 4900
+ },
+ {
+ "epoch": 0.025539028278669496,
+ "grad_norm": 26784.7265625,
+ "learning_rate": 4.949e-05,
+ "loss": 0.64,
+ "step": 4950
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "grad_norm": 27480.509765625,
+ "learning_rate": 4.999e-05,
+ "loss": 0.6515,
+ "step": 5000
+ },
+ {
+ "epoch": 0.025796998261282317,
+ "eval_loss": 0.6312834024429321,
+ "eval_runtime": 3280.995,
+ "eval_samples_per_second": 945.177,
+ "eval_steps_per_second": 1.846,
+ "step": 5000
+ },
+ {
+ "epoch": 0.02605496824389514,
+ "grad_norm": 27871.740234375,
+ "learning_rate": 5.0490000000000006e-05,
+ "loss": 0.6424,
+ "step": 5050
+ },
+ {
+ "epoch": 0.026312938226507963,
+ "grad_norm": 31187.00390625,
+ "learning_rate": 5.0990000000000005e-05,
+ "loss": 0.643,
+ "step": 5100
+ },
+ {
+ "epoch": 0.026570908209120787,
+ "grad_norm": 25956.521484375,
+ "learning_rate": 5.149e-05,
+ "loss": 0.65,
+ "step": 5150
+ },
+ {
+ "epoch": 0.02682887819173361,
+ "grad_norm": 25967.70703125,
+ "learning_rate": 5.199000000000001e-05,
+ "loss": 0.6466,
+ "step": 5200
+ },
+ {
+ "epoch": 0.027086848174346433,
+ "grad_norm": 25310.275390625,
+ "learning_rate": 5.249000000000001e-05,
+ "loss": 0.6429,
+ "step": 5250
+ },
+ {
+ "epoch": 0.027344818156959255,
+ "grad_norm": 24740.033203125,
+ "learning_rate": 5.2990000000000006e-05,
+ "loss": 0.6415,
+ "step": 5300
+ },
+ {
+ "epoch": 0.02760278813957208,
+ "grad_norm": 30795.58984375,
+ "learning_rate": 5.3490000000000005e-05,
+ "loss": 0.6424,
+ "step": 5350
+ },
+ {
+ "epoch": 0.027860758122184904,
+ "grad_norm": 30625.59375,
+ "learning_rate": 5.399000000000001e-05,
+ "loss": 0.6361,
+ "step": 5400
+ },
+ {
+ "epoch": 0.028118728104797725,
+ "grad_norm": 27036.14453125,
+ "learning_rate": 5.449000000000001e-05,
+ "loss": 0.6351,
+ "step": 5450
+ },
+ {
+ "epoch": 0.02837669808741055,
+ "grad_norm": 26934.447265625,
+ "learning_rate": 5.499000000000001e-05,
+ "loss": 0.6304,
+ "step": 5500
+ },
+ {
+ "epoch": 0.02863466807002337,
+ "grad_norm": 25540.291015625,
+ "learning_rate": 5.549e-05,
+ "loss": 0.6304,
+ "step": 5550
+ },
+ {
+ "epoch": 0.028892638052636196,
+ "grad_norm": 26574.9375,
+ "learning_rate": 5.599e-05,
+ "loss": 0.6444,
+ "step": 5600
+ },
+ {
+ "epoch": 0.029150608035249017,
+ "grad_norm": 26941.955078125,
+ "learning_rate": 5.6489999999999996e-05,
+ "loss": 0.6373,
+ "step": 5650
+ },
+ {
+ "epoch": 0.02940857801786184,
+ "grad_norm": 26957.7734375,
+ "learning_rate": 5.699e-05,
+ "loss": 0.6363,
+ "step": 5700
+ },
+ {
+ "epoch": 0.029666548000474666,
+ "grad_norm": 24377.55859375,
+ "learning_rate": 5.749e-05,
+ "loss": 0.6213,
+ "step": 5750
+ },
+ {
+ "epoch": 0.029924517983087488,
+ "grad_norm": 25600.697265625,
+ "learning_rate": 5.799e-05,
+ "loss": 0.6362,
+ "step": 5800
+ },
+ {
+ "epoch": 0.030182487965700312,
+ "grad_norm": 23841.47265625,
+ "learning_rate": 5.849e-05,
+ "loss": 0.6274,
+ "step": 5850
+ },
+ {
+ "epoch": 0.030440457948313134,
+ "grad_norm": 23847.73046875,
+ "learning_rate": 5.899e-05,
+ "loss": 0.624,
+ "step": 5900
+ },
+ {
+ "epoch": 0.030698427930925958,
+ "grad_norm": 25549.033203125,
+ "learning_rate": 5.949e-05,
+ "loss": 0.627,
+ "step": 5950
+ },
+ {
+ "epoch": 0.03095639791353878,
+ "grad_norm": 25286.8046875,
+ "learning_rate": 5.999e-05,
+ "loss": 0.6272,
+ "step": 6000
+ },
+ {
+ "epoch": 0.031214367896151604,
+ "grad_norm": 25137.384765625,
+ "learning_rate": 6.0490000000000005e-05,
+ "loss": 0.622,
+ "step": 6050
+ },
+ {
+ "epoch": 0.031472337878764425,
+ "grad_norm": 23606.23828125,
+ "learning_rate": 6.0990000000000004e-05,
+ "loss": 0.6262,
+ "step": 6100
+ },
+ {
+ "epoch": 0.031730307861377254,
+ "grad_norm": 32101.404296875,
+ "learning_rate": 6.149000000000001e-05,
+ "loss": 0.619,
+ "step": 6150
+ },
+ {
+ "epoch": 0.031988277843990075,
+ "grad_norm": 23683.73046875,
+ "learning_rate": 6.199000000000001e-05,
+ "loss": 0.6129,
+ "step": 6200
+ },
+ {
+ "epoch": 0.032246247826602896,
+ "grad_norm": 25243.49609375,
+ "learning_rate": 6.249e-05,
+ "loss": 0.6194,
+ "step": 6250
+ },
+ {
+ "epoch": 0.03250421780921572,
+ "grad_norm": 28690.10546875,
+ "learning_rate": 6.299e-05,
+ "loss": 0.6199,
+ "step": 6300
+ },
+ {
+ "epoch": 0.032762187791828545,
+ "grad_norm": 24198.47265625,
+ "learning_rate": 6.349e-05,
+ "loss": 0.6077,
+ "step": 6350
+ },
+ {
+ "epoch": 0.03302015777444137,
+ "grad_norm": 24742.998046875,
+ "learning_rate": 6.399e-05,
+ "loss": 0.6168,
+ "step": 6400
+ },
+ {
+ "epoch": 0.03327812775705419,
+ "grad_norm": 27489.93359375,
+ "learning_rate": 6.449e-05,
+ "loss": 0.6136,
+ "step": 6450
+ },
+ {
+ "epoch": 0.03353609773966701,
+ "grad_norm": 28733.7265625,
+ "learning_rate": 6.499000000000001e-05,
+ "loss": 0.6184,
+ "step": 6500
+ },
+ {
+ "epoch": 0.03379406772227984,
+ "grad_norm": 23810.544921875,
+ "learning_rate": 6.549000000000001e-05,
+ "loss": 0.6167,
+ "step": 6550
+ },
+ {
+ "epoch": 0.03405203770489266,
+ "grad_norm": 25503.98828125,
+ "learning_rate": 6.599000000000001e-05,
+ "loss": 0.6184,
+ "step": 6600
+ },
+ {
+ "epoch": 0.03431000768750548,
+ "grad_norm": 24550.26171875,
+ "learning_rate": 6.649000000000001e-05,
+ "loss": 0.6146,
+ "step": 6650
+ },
+ {
+ "epoch": 0.03456797767011831,
+ "grad_norm": 22774.71875,
+ "learning_rate": 6.699000000000001e-05,
+ "loss": 0.6132,
+ "step": 6700
+ },
+ {
+ "epoch": 0.03482594765273113,
+ "grad_norm": 23878.90625,
+ "learning_rate": 6.749e-05,
+ "loss": 0.6127,
+ "step": 6750
+ },
+ {
+ "epoch": 0.03508391763534395,
+ "grad_norm": 28744.9921875,
+ "learning_rate": 6.799e-05,
+ "loss": 0.6203,
+ "step": 6800
+ },
+ {
+ "epoch": 0.03534188761795677,
+ "grad_norm": 24239.826171875,
+ "learning_rate": 6.849e-05,
+ "loss": 0.6069,
+ "step": 6850
+ },
+ {
+ "epoch": 0.0355998576005696,
+ "grad_norm": 27030.513671875,
+ "learning_rate": 6.899e-05,
+ "loss": 0.614,
+ "step": 6900
+ },
+ {
+ "epoch": 0.03585782758318242,
+ "grad_norm": 22872.59375,
+ "learning_rate": 6.949e-05,
+ "loss": 0.6068,
+ "step": 6950
+ },
+ {
+ "epoch": 0.03611579756579524,
+ "grad_norm": 23280.333984375,
+ "learning_rate": 6.999e-05,
+ "loss": 0.6064,
+ "step": 7000
+ },
+ {
+ "epoch": 0.03637376754840807,
+ "grad_norm": 24819.060546875,
+ "learning_rate": 7.049e-05,
+ "loss": 0.606,
+ "step": 7050
+ },
+ {
+ "epoch": 0.03663173753102089,
+ "grad_norm": 23739.595703125,
+ "learning_rate": 7.099e-05,
+ "loss": 0.6065,
+ "step": 7100
+ },
+ {
+ "epoch": 0.03688970751363371,
+ "grad_norm": 24261.28515625,
+ "learning_rate": 7.149e-05,
+ "loss": 0.6037,
+ "step": 7150
+ },
+ {
+ "epoch": 0.037147677496246534,
+ "grad_norm": 24133.744140625,
+ "learning_rate": 7.199000000000001e-05,
+ "loss": 0.6097,
+ "step": 7200
+ },
+ {
+ "epoch": 0.03740564747885936,
+ "grad_norm": 22903.197265625,
+ "learning_rate": 7.249e-05,
+ "loss": 0.6048,
+ "step": 7250
+ },
+ {
+ "epoch": 0.03766361746147218,
+ "grad_norm": 23503.970703125,
+ "learning_rate": 7.299e-05,
+ "loss": 0.6039,
+ "step": 7300
+ },
+ {
+ "epoch": 0.037921587444085005,
+ "grad_norm": 20935.388671875,
+ "learning_rate": 7.349e-05,
+ "loss": 0.6016,
+ "step": 7350
+ },
+ {
+ "epoch": 0.03817955742669783,
+ "grad_norm": 22991.720703125,
+ "learning_rate": 7.399e-05,
+ "loss": 0.6111,
+ "step": 7400
+ },
+ {
+ "epoch": 0.038437527409310654,
+ "grad_norm": 21915.90234375,
+ "learning_rate": 7.449e-05,
+ "loss": 0.5969,
+ "step": 7450
+ },
+ {
+ "epoch": 0.038695497391923475,
+ "grad_norm": 22474.25390625,
+ "learning_rate": 7.499e-05,
+ "loss": 0.6068,
+ "step": 7500
+ },
+ {
+ "epoch": 0.038953467374536296,
+ "grad_norm": 24122.150390625,
+ "learning_rate": 7.549000000000001e-05,
+ "loss": 0.6037,
+ "step": 7550
+ },
+ {
+ "epoch": 0.039211437357149125,
+ "grad_norm": 22262.220703125,
+ "learning_rate": 7.599000000000001e-05,
+ "loss": 0.5946,
+ "step": 7600
+ },
+ {
+ "epoch": 0.039469407339761946,
+ "grad_norm": 23959.7265625,
+ "learning_rate": 7.649000000000001e-05,
+ "loss": 0.598,
+ "step": 7650
+ },
+ {
+ "epoch": 0.03972737732237477,
+ "grad_norm": 21918.5859375,
+ "learning_rate": 7.699e-05,
+ "loss": 0.5959,
+ "step": 7700
+ },
+ {
+ "epoch": 0.03998534730498759,
+ "grad_norm": 23740.5390625,
+ "learning_rate": 7.749e-05,
+ "loss": 0.594,
+ "step": 7750
+ },
+ {
+ "epoch": 0.040243317287600416,
+ "grad_norm": 23406.4296875,
+ "learning_rate": 7.799e-05,
+ "loss": 0.6048,
+ "step": 7800
+ },
+ {
+ "epoch": 0.04050128727021324,
+ "grad_norm": 23423.201171875,
+ "learning_rate": 7.849e-05,
+ "loss": 0.5944,
+ "step": 7850
+ },
+ {
+ "epoch": 0.04075925725282606,
+ "grad_norm": 23187.76171875,
+ "learning_rate": 7.899000000000001e-05,
+ "loss": 0.5944,
+ "step": 7900
+ },
+ {
+ "epoch": 0.04101722723543889,
+ "grad_norm": 25532.4375,
+ "learning_rate": 7.949000000000001e-05,
+ "loss": 0.5978,
+ "step": 7950
+ },
+ {
+ "epoch": 0.04127519721805171,
+ "grad_norm": 23045.28515625,
+ "learning_rate": 7.999000000000001e-05,
+ "loss": 0.5968,
+ "step": 8000
+ },
+ {
+ "epoch": 0.04153316720066453,
+ "grad_norm": 22853.826171875,
+ "learning_rate": 8.049e-05,
+ "loss": 0.5915,
+ "step": 8050
+ },
+ {
+ "epoch": 0.04179113718327735,
+ "grad_norm": 21853.658203125,
+ "learning_rate": 8.099e-05,
+ "loss": 0.5932,
+ "step": 8100
+ },
+ {
+ "epoch": 0.04204910716589018,
+ "grad_norm": 22395.74609375,
+ "learning_rate": 8.149e-05,
+ "loss": 0.5925,
+ "step": 8150
+ },
+ {
+ "epoch": 0.042307077148503,
+ "grad_norm": 23933.40625,
+ "learning_rate": 8.199e-05,
+ "loss": 0.5878,
+ "step": 8200
+ },
+ {
+ "epoch": 0.04256504713111582,
+ "grad_norm": 21773.087890625,
+ "learning_rate": 8.249e-05,
+ "loss": 0.5916,
+ "step": 8250
+ },
+ {
+ "epoch": 0.04282301711372865,
+ "grad_norm": 22665.11328125,
+ "learning_rate": 8.299e-05,
+ "loss": 0.5906,
+ "step": 8300
+ },
+ {
+ "epoch": 0.04308098709634147,
+ "grad_norm": 22157.091796875,
+ "learning_rate": 8.349e-05,
+ "loss": 0.5873,
+ "step": 8350
+ },
+ {
+ "epoch": 0.04333895707895429,
+ "grad_norm": 21506.8125,
+ "learning_rate": 8.399e-05,
+ "loss": 0.5927,
+ "step": 8400
+ },
+ {
+ "epoch": 0.04359692706156711,
+ "grad_norm": 22143.341796875,
+ "learning_rate": 8.449e-05,
+ "loss": 0.5828,
+ "step": 8450
+ },
+ {
+ "epoch": 0.04385489704417994,
+ "grad_norm": 23341.23828125,
+ "learning_rate": 8.499e-05,
+ "loss": 0.5885,
+ "step": 8500
+ },
+ {
+ "epoch": 0.04411286702679276,
+ "grad_norm": 21876.96484375,
+ "learning_rate": 8.549000000000001e-05,
+ "loss": 0.5913,
+ "step": 8550
+ },
+ {
+ "epoch": 0.044370837009405584,
+ "grad_norm": 22307.29296875,
+ "learning_rate": 8.599000000000001e-05,
+ "loss": 0.583,
+ "step": 8600
+ },
+ {
+ "epoch": 0.04462880699201841,
+ "grad_norm": 22859.017578125,
+ "learning_rate": 8.649000000000001e-05,
+ "loss": 0.5889,
+ "step": 8650
+ },
+ {
+ "epoch": 0.04488677697463123,
+ "grad_norm": 22058.24609375,
+ "learning_rate": 8.699e-05,
+ "loss": 0.5848,
+ "step": 8700
+ },
+ {
+ "epoch": 0.045144746957244054,
+ "grad_norm": 22116.837890625,
+ "learning_rate": 8.749e-05,
+ "loss": 0.5858,
+ "step": 8750
+ },
+ {
+ "epoch": 0.045402716939856876,
+ "grad_norm": 23110.17578125,
+ "learning_rate": 8.799e-05,
+ "loss": 0.5855,
+ "step": 8800
+ },
+ {
+ "epoch": 0.045660686922469704,
+ "grad_norm": 24173.064453125,
+ "learning_rate": 8.849e-05,
+ "loss": 0.5878,
+ "step": 8850
+ },
+ {
+ "epoch": 0.045918656905082525,
+ "grad_norm": 21521.48046875,
+ "learning_rate": 8.899e-05,
+ "loss": 0.5914,
+ "step": 8900
+ },
+ {
+ "epoch": 0.046176626887695346,
+ "grad_norm": 24516.0,
+ "learning_rate": 8.949000000000001e-05,
+ "loss": 0.5849,
+ "step": 8950
+ },
+ {
+ "epoch": 0.046434596870308174,
+ "grad_norm": 22074.9609375,
+ "learning_rate": 8.999000000000001e-05,
+ "loss": 0.5848,
+ "step": 9000
+ },
+ {
+ "epoch": 0.046692566852920996,
+ "grad_norm": 21495.4140625,
+ "learning_rate": 9.049000000000001e-05,
+ "loss": 0.579,
+ "step": 9050
+ },
+ {
+ "epoch": 0.04695053683553382,
+ "grad_norm": 23548.224609375,
+ "learning_rate": 9.099000000000001e-05,
+ "loss": 0.5826,
+ "step": 9100
+ },
+ {
+ "epoch": 0.04720850681814664,
+ "grad_norm": 22144.51953125,
+ "learning_rate": 9.149e-05,
+ "loss": 0.5879,
+ "step": 9150
+ },
+ {
+ "epoch": 0.047466476800759466,
+ "grad_norm": 20656.185546875,
+ "learning_rate": 9.199e-05,
+ "loss": 0.5806,
+ "step": 9200
+ },
+ {
+ "epoch": 0.04772444678337229,
+ "grad_norm": 21228.814453125,
+ "learning_rate": 9.249e-05,
+ "loss": 0.5858,
+ "step": 9250
+ },
+ {
+ "epoch": 0.04798241676598511,
+ "grad_norm": 20801.869140625,
+ "learning_rate": 9.299e-05,
+ "loss": 0.5816,
+ "step": 9300
+ },
+ {
+ "epoch": 0.04824038674859793,
+ "grad_norm": 24044.283203125,
+ "learning_rate": 9.349e-05,
+ "loss": 0.5811,
+ "step": 9350
+ },
+ {
+ "epoch": 0.04849835673121076,
+ "grad_norm": 22395.47265625,
+ "learning_rate": 9.399e-05,
+ "loss": 0.5782,
+ "step": 9400
+ },
+ {
+ "epoch": 0.04875632671382358,
+ "grad_norm": 22353.078125,
+ "learning_rate": 9.449e-05,
+ "loss": 0.5758,
+ "step": 9450
+ },
+ {
+ "epoch": 0.0490142966964364,
+ "grad_norm": 22520.72265625,
+ "learning_rate": 9.499e-05,
+ "loss": 0.5752,
+ "step": 9500
+ },
+ {
+ "epoch": 0.04927226667904923,
+ "grad_norm": 22016.951171875,
+ "learning_rate": 9.549e-05,
+ "loss": 0.5764,
+ "step": 9550
+ },
+ {
+ "epoch": 0.04953023666166205,
+ "grad_norm": 20046.615234375,
+ "learning_rate": 9.599000000000001e-05,
+ "loss": 0.5759,
+ "step": 9600
+ },
+ {
+ "epoch": 0.04978820664427487,
+ "grad_norm": 21346.029296875,
+ "learning_rate": 9.649e-05,
+ "loss": 0.5798,
+ "step": 9650
+ },
+ {
+ "epoch": 0.05004617662688769,
+ "grad_norm": 22449.796875,
+ "learning_rate": 9.699e-05,
+ "loss": 0.5829,
+ "step": 9700
+ },
+ {
+ "epoch": 0.05030414660950052,
+ "grad_norm": 20538.751953125,
+ "learning_rate": 9.749e-05,
+ "loss": 0.5809,
+ "step": 9750
+ },
+ {
+ "epoch": 0.05056211659211334,
+ "grad_norm": 21123.19921875,
+ "learning_rate": 9.799e-05,
+ "loss": 0.5726,
+ "step": 9800
+ },
+ {
+ "epoch": 0.05082008657472616,
+ "grad_norm": 20853.08203125,
+ "learning_rate": 9.849e-05,
+ "loss": 0.5726,
+ "step": 9850
+ },
+ {
+ "epoch": 0.05107805655733899,
+ "grad_norm": 22160.841796875,
+ "learning_rate": 9.899e-05,
+ "loss": 0.5783,
+ "step": 9900
+ },
+ {
+ "epoch": 0.05133602653995181,
+ "grad_norm": 19711.109375,
+ "learning_rate": 9.949000000000001e-05,
+ "loss": 0.5722,
+ "step": 9950
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "grad_norm": 21442.310546875,
+ "learning_rate": 9.999000000000001e-05,
+ "loss": 0.5773,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051593996522564634,
+ "eval_loss": 0.5661358833312988,
+ "eval_runtime": 3272.6524,
+ "eval_samples_per_second": 947.586,
+ "eval_steps_per_second": 1.851,
+ "step": 10000
+ },
+ {
+ "epoch": 0.051851966505177455,
+ "grad_norm": 21442.943359375,
+ "learning_rate": 9.999998718392692e-05,
+ "loss": 0.5727,
+ "step": 10050
+ },
+ {
+ "epoch": 0.05210993648779028,
+ "grad_norm": 21711.177734375,
+ "learning_rate": 9.999994768416664e-05,
+ "loss": 0.5707,
+ "step": 10100
+ },
+ {
+ "epoch": 0.052367906470403104,
+ "grad_norm": 21793.666015625,
+ "learning_rate": 9.999988149540251e-05,
+ "loss": 0.5727,
+ "step": 10150
+ },
+ {
+ "epoch": 0.052625876453015925,
+ "grad_norm": 18847.970703125,
+ "learning_rate": 9.999978861766983e-05,
+ "loss": 0.5726,
+ "step": 10200
+ },
+ {
+ "epoch": 0.052883846435628754,
+ "grad_norm": 22870.91796875,
+ "learning_rate": 9.999966905101816e-05,
+ "loss": 0.5751,
+ "step": 10250
+ },
+ {
+ "epoch": 0.053141816418241575,
+ "grad_norm": 23970.431640625,
+ "learning_rate": 9.999952279551135e-05,
+ "loss": 0.5745,
+ "step": 10300
+ },
+ {
+ "epoch": 0.053399786400854396,
+ "grad_norm": 19482.65625,
+ "learning_rate": 9.999934985122746e-05,
+ "loss": 0.5734,
+ "step": 10350
+ },
+ {
+ "epoch": 0.05365775638346722,
+ "grad_norm": 19720.65625,
+ "learning_rate": 9.999915021825879e-05,
+ "loss": 0.5697,
+ "step": 10400
+ },
+ {
+ "epoch": 0.053915726366080045,
+ "grad_norm": 21484.8203125,
+ "learning_rate": 9.99989238967119e-05,
+ "loss": 0.5678,
+ "step": 10450
+ },
+ {
+ "epoch": 0.05417369634869287,
+ "grad_norm": 20198.669921875,
+ "learning_rate": 9.999867088670762e-05,
+ "loss": 0.5731,
+ "step": 10500
+ },
+ {
+ "epoch": 0.05443166633130569,
+ "grad_norm": 19887.86328125,
+ "learning_rate": 9.999839118838099e-05,
+ "loss": 0.5711,
+ "step": 10550
+ },
+ {
+ "epoch": 0.05468963631391851,
+ "grad_norm": 21250.41796875,
+ "learning_rate": 9.999808480188131e-05,
+ "loss": 0.5653,
+ "step": 10600
+ },
+ {
+ "epoch": 0.05494760629653134,
+ "grad_norm": 21179.904296875,
+ "learning_rate": 9.999775172737211e-05,
+ "loss": 0.5666,
+ "step": 10650
+ },
+ {
+ "epoch": 0.05520557627914416,
+ "grad_norm": 21106.083984375,
+ "learning_rate": 9.999739196503119e-05,
+ "loss": 0.5656,
+ "step": 10700
+ },
+ {
+ "epoch": 0.05546354626175698,
+ "grad_norm": 19393.994140625,
+ "learning_rate": 9.999700551505057e-05,
+ "loss": 0.566,
+ "step": 10750
+ },
+ {
+ "epoch": 0.05572151624436981,
+ "grad_norm": 22788.060546875,
+ "learning_rate": 9.999659237763656e-05,
+ "loss": 0.5681,
+ "step": 10800
+ },
+ {
+ "epoch": 0.05597948622698263,
+ "grad_norm": 20106.75390625,
+ "learning_rate": 9.999615255300966e-05,
+ "loss": 0.5668,
+ "step": 10850
+ },
+ {
+ "epoch": 0.05623745620959545,
+ "grad_norm": 22390.466796875,
+ "learning_rate": 9.999568604140464e-05,
+ "loss": 0.5665,
+ "step": 10900
+ },
+ {
+ "epoch": 0.05649542619220827,
+ "grad_norm": 21145.044921875,
+ "learning_rate": 9.999519284307053e-05,
+ "loss": 0.5645,
+ "step": 10950
+ },
+ {
+ "epoch": 0.0567533961748211,
+ "grad_norm": 22501.64453125,
+ "learning_rate": 9.999467295827059e-05,
+ "loss": 0.5663,
+ "step": 11000
+ },
+ {
+ "epoch": 0.05701136615743392,
+ "grad_norm": 21079.431640625,
+ "learning_rate": 9.999412638728229e-05,
+ "loss": 0.5605,
+ "step": 11050
+ },
+ {
+ "epoch": 0.05726933614004674,
+ "grad_norm": 21501.4375,
+ "learning_rate": 9.999355313039742e-05,
+ "loss": 0.5643,
+ "step": 11100
+ },
+ {
+ "epoch": 0.05752730612265957,
+ "grad_norm": 22092.6328125,
+ "learning_rate": 9.999295318792194e-05,
+ "loss": 0.5602,
+ "step": 11150
+ },
+ {
+ "epoch": 0.05778527610527239,
+ "grad_norm": 19948.81640625,
+ "learning_rate": 9.999232656017613e-05,
+ "loss": 0.5649,
+ "step": 11200
+ },
+ {
+ "epoch": 0.05804324608788521,
+ "grad_norm": 20543.5859375,
+ "learning_rate": 9.999167324749443e-05,
+ "loss": 0.5598,
+ "step": 11250
+ },
+ {
+ "epoch": 0.058301216070498034,
+ "grad_norm": 20948.060546875,
+ "learning_rate": 9.99909932502256e-05,
+ "loss": 0.5631,
+ "step": 11300
+ },
+ {
+ "epoch": 0.05855918605311086,
+ "grad_norm": 20384.732421875,
+ "learning_rate": 9.999028656873257e-05,
+ "loss": 0.5592,
+ "step": 11350
+ },
+ {
+ "epoch": 0.05881715603572368,
+ "grad_norm": 20027.615234375,
+ "learning_rate": 9.99895532033926e-05,
+ "loss": 0.5658,
+ "step": 11400
+ },
+ {
+ "epoch": 0.059075126018336505,
+ "grad_norm": 20702.263671875,
+ "learning_rate": 9.99887931545971e-05,
+ "loss": 0.56,
+ "step": 11450
+ },
+ {
+ "epoch": 0.05933309600094933,
+ "grad_norm": 21589.52734375,
+ "learning_rate": 9.99880064227518e-05,
+ "loss": 0.5595,
+ "step": 11500
+ },
+ {
+ "epoch": 0.059591065983562154,
+ "grad_norm": 20375.181640625,
+ "learning_rate": 9.998719300827663e-05,
+ "loss": 0.5627,
+ "step": 11550
+ },
+ {
+ "epoch": 0.059849035966174975,
+ "grad_norm": 20207.677734375,
+ "learning_rate": 9.998635291160577e-05,
+ "loss": 0.5615,
+ "step": 11600
+ },
+ {
+ "epoch": 0.060107005948787796,
+ "grad_norm": 20898.291015625,
+ "learning_rate": 9.998548613318767e-05,
+ "loss": 0.5594,
+ "step": 11650
+ },
+ {
+ "epoch": 0.060364975931400625,
+ "grad_norm": 20133.822265625,
+ "learning_rate": 9.998459267348497e-05,
+ "loss": 0.5631,
+ "step": 11700
+ },
+ {
+ "epoch": 0.060622945914013446,
+ "grad_norm": 19021.533203125,
+ "learning_rate": 9.99836725329746e-05,
+ "loss": 0.5576,
+ "step": 11750
+ },
+ {
+ "epoch": 0.06088091589662627,
+ "grad_norm": 19088.32421875,
+ "learning_rate": 9.998272571214772e-05,
+ "loss": 0.5619,
+ "step": 11800
+ },
+ {
+ "epoch": 0.061138885879239095,
+ "grad_norm": 19742.841796875,
+ "learning_rate": 9.99817522115097e-05,
+ "loss": 0.5626,
+ "step": 11850
+ },
+ {
+ "epoch": 0.061396855861851916,
+ "grad_norm": 21584.271484375,
+ "learning_rate": 9.99807520315802e-05,
+ "loss": 0.555,
+ "step": 11900
+ },
+ {
+ "epoch": 0.06165482584446474,
+ "grad_norm": 19766.76953125,
+ "learning_rate": 9.997972517289309e-05,
+ "loss": 0.5584,
+ "step": 11950
+ },
+ {
+ "epoch": 0.06191279582707756,
+ "grad_norm": 19821.556640625,
+ "learning_rate": 9.997867163599646e-05,
+ "loss": 0.5623,
+ "step": 12000
+ },
+ {
+ "epoch": 0.06217076580969039,
+ "grad_norm": 19488.490234375,
+ "learning_rate": 9.997759142145271e-05,
+ "loss": 0.5591,
+ "step": 12050
+ },
+ {
+ "epoch": 0.06242873579230321,
+ "grad_norm": 20093.806640625,
+ "learning_rate": 9.997648452983842e-05,
+ "loss": 0.5597,
+ "step": 12100
+ },
+ {
+ "epoch": 0.06268670577491603,
+ "grad_norm": 20202.154296875,
+ "learning_rate": 9.997535096174441e-05,
+ "loss": 0.5542,
+ "step": 12150
+ },
+ {
+ "epoch": 0.06294467575752885,
+ "grad_norm": 19978.154296875,
+ "learning_rate": 9.99741907177758e-05,
+ "loss": 0.5629,
+ "step": 12200
+ },
+ {
+ "epoch": 0.06320264574014167,
+ "grad_norm": 19697.005859375,
+ "learning_rate": 9.997300379855186e-05,
+ "loss": 0.5571,
+ "step": 12250
+ },
+ {
+ "epoch": 0.06346061572275451,
+ "grad_norm": 20384.287109375,
+ "learning_rate": 9.997179020470618e-05,
+ "loss": 0.5526,
+ "step": 12300
+ },
+ {
+ "epoch": 0.06371858570536733,
+ "grad_norm": 18652.044921875,
+ "learning_rate": 9.997054993688651e-05,
+ "loss": 0.5531,
+ "step": 12350
+ },
+ {
+ "epoch": 0.06397655568798015,
+ "grad_norm": 20133.990234375,
+ "learning_rate": 9.996928299575493e-05,
+ "loss": 0.5561,
+ "step": 12400
+ },
+ {
+ "epoch": 0.06423452567059297,
+ "grad_norm": 20575.875,
+ "learning_rate": 9.996798938198766e-05,
+ "loss": 0.5559,
+ "step": 12450
+ },
+ {
+ "epoch": 0.06449249565320579,
+ "grad_norm": 19524.828125,
+ "learning_rate": 9.996666909627525e-05,
+ "loss": 0.5437,
+ "step": 12500
+ },
+ {
+ "epoch": 0.06475046563581861,
+ "grad_norm": 22106.927734375,
+ "learning_rate": 9.996532213932242e-05,
+ "loss": 0.5691,
+ "step": 12550
+ },
+ {
+ "epoch": 0.06500843561843143,
+ "grad_norm": 18443.4609375,
+ "learning_rate": 9.996394851184814e-05,
+ "loss": 0.553,
+ "step": 12600
+ },
+ {
+ "epoch": 0.06526640560104426,
+ "grad_norm": 21786.943359375,
+ "learning_rate": 9.996254821458565e-05,
+ "loss": 0.562,
+ "step": 12650
+ },
+ {
+ "epoch": 0.06552437558365709,
+ "grad_norm": 22699.578125,
+ "learning_rate": 9.996112124828241e-05,
+ "loss": 0.5526,
+ "step": 12700
+ },
+ {
+ "epoch": 0.06578234556626991,
+ "grad_norm": 18522.822265625,
+ "learning_rate": 9.995966761370006e-05,
+ "loss": 0.5525,
+ "step": 12750
+ },
+ {
+ "epoch": 0.06604031554888273,
+ "grad_norm": 19723.44140625,
+ "learning_rate": 9.995818731161458e-05,
+ "loss": 0.5555,
+ "step": 12800
+ },
+ {
+ "epoch": 0.06629828553149555,
+ "grad_norm": 20643.173828125,
+ "learning_rate": 9.995668034281606e-05,
+ "loss": 0.5506,
+ "step": 12850
+ },
+ {
+ "epoch": 0.06655625551410838,
+ "grad_norm": 19303.68359375,
+ "learning_rate": 9.995514670810896e-05,
+ "loss": 0.5599,
+ "step": 12900
+ },
+ {
+ "epoch": 0.0668142254967212,
+ "grad_norm": 19837.240234375,
+ "learning_rate": 9.995358640831187e-05,
+ "loss": 0.5514,
+ "step": 12950
+ },
+ {
+ "epoch": 0.06707219547933402,
+ "grad_norm": 19212.25390625,
+ "learning_rate": 9.995199944425764e-05,
+ "loss": 0.5542,
+ "step": 13000
+ },
+ {
+ "epoch": 0.06733016546194685,
+ "grad_norm": 19908.70703125,
+ "learning_rate": 9.995038581679337e-05,
+ "loss": 0.5421,
+ "step": 13050
+ },
+ {
+ "epoch": 0.06758813544455967,
+ "grad_norm": 18933.306640625,
+ "learning_rate": 9.994874552678038e-05,
+ "loss": 0.549,
+ "step": 13100
+ },
+ {
+ "epoch": 0.0678461054271725,
+ "grad_norm": 19313.990234375,
+ "learning_rate": 9.994707857509422e-05,
+ "loss": 0.5569,
+ "step": 13150
+ },
+ {
+ "epoch": 0.06810407540978532,
+ "grad_norm": 20800.984375,
+ "learning_rate": 9.99453849626247e-05,
+ "loss": 0.5518,
+ "step": 13200
+ },
+ {
+ "epoch": 0.06836204539239814,
+ "grad_norm": 18623.361328125,
+ "learning_rate": 9.994366469027583e-05,
+ "loss": 0.5549,
+ "step": 13250
+ },
+ {
+ "epoch": 0.06862001537501096,
+ "grad_norm": 19761.654296875,
+ "learning_rate": 9.994191775896584e-05,
+ "loss": 0.5467,
+ "step": 13300
+ },
+ {
+ "epoch": 0.06887798535762378,
+ "grad_norm": 20618.501953125,
+ "learning_rate": 9.994014416962723e-05,
+ "loss": 0.5554,
+ "step": 13350
+ },
+ {
+ "epoch": 0.06913595534023662,
+ "grad_norm": 19279.791015625,
+ "learning_rate": 9.993834392320668e-05,
+ "loss": 0.5567,
+ "step": 13400
+ },
+ {
+ "epoch": 0.06939392532284944,
+ "grad_norm": 18802.34375,
+ "learning_rate": 9.993651702066516e-05,
+ "loss": 0.5608,
+ "step": 13450
+ },
+ {
+ "epoch": 0.06965189530546226,
+ "grad_norm": 20132.15625,
+ "learning_rate": 9.993466346297779e-05,
+ "loss": 0.547,
+ "step": 13500
+ },
+ {
+ "epoch": 0.06990986528807508,
+ "grad_norm": 19165.26171875,
+ "learning_rate": 9.993278325113403e-05,
+ "loss": 0.5485,
+ "step": 13550
+ },
+ {
+ "epoch": 0.0701678352706879,
+ "grad_norm": 18493.01171875,
+ "learning_rate": 9.993087638613743e-05,
+ "loss": 0.5455,
+ "step": 13600
+ },
+ {
+ "epoch": 0.07042580525330072,
+ "grad_norm": 18225.78125,
+ "learning_rate": 9.992894286900589e-05,
+ "loss": 0.5499,
+ "step": 13650
+ },
+ {
+ "epoch": 0.07068377523591354,
+ "grad_norm": 20189.802734375,
+ "learning_rate": 9.992698270077146e-05,
+ "loss": 0.5468,
+ "step": 13700
+ },
+ {
+ "epoch": 0.07094174521852638,
+ "grad_norm": 20861.2734375,
+ "learning_rate": 9.992499588248043e-05,
+ "loss": 0.5588,
+ "step": 13750
+ },
+ {
+ "epoch": 0.0711997152011392,
+ "grad_norm": 19876.689453125,
+ "learning_rate": 9.992298241519335e-05,
+ "loss": 0.5486,
+ "step": 13800
+ },
+ {
+ "epoch": 0.07145768518375202,
+ "grad_norm": 18371.142578125,
+ "learning_rate": 9.992094229998497e-05,
+ "loss": 0.5475,
+ "step": 13850
+ },
+ {
+ "epoch": 0.07171565516636484,
+ "grad_norm": 18274.396484375,
+ "learning_rate": 9.991887553794423e-05,
+ "loss": 0.549,
+ "step": 13900
+ },
+ {
+ "epoch": 0.07197362514897766,
+ "grad_norm": 18204.947265625,
+ "learning_rate": 9.991678213017437e-05,
+ "loss": 0.5419,
+ "step": 13950
+ },
+ {
+ "epoch": 0.07223159513159048,
+ "grad_norm": 18634.162109375,
+ "learning_rate": 9.991466207779278e-05,
+ "loss": 0.5528,
+ "step": 14000
+ },
+ {
+ "epoch": 0.0724895651142033,
+ "grad_norm": 21840.685546875,
+ "learning_rate": 9.991251538193112e-05,
+ "loss": 0.5492,
+ "step": 14050
+ },
+ {
+ "epoch": 0.07274753509681614,
+ "grad_norm": 18888.935546875,
+ "learning_rate": 9.991034204373524e-05,
+ "loss": 0.5504,
+ "step": 14100
+ },
+ {
+ "epoch": 0.07300550507942896,
+ "grad_norm": 19353.263671875,
+ "learning_rate": 9.990814206436524e-05,
+ "loss": 0.5425,
+ "step": 14150
+ },
+ {
+ "epoch": 0.07326347506204178,
+ "grad_norm": 18891.79296875,
+ "learning_rate": 9.990591544499543e-05,
+ "loss": 0.551,
+ "step": 14200
+ },
+ {
+ "epoch": 0.0735214450446546,
+ "grad_norm": 17878.33203125,
+ "learning_rate": 9.99036621868143e-05,
+ "loss": 0.5403,
+ "step": 14250
+ },
+ {
+ "epoch": 0.07377941502726743,
+ "grad_norm": 18997.544921875,
+ "learning_rate": 9.990138229102465e-05,
+ "loss": 0.5458,
+ "step": 14300
+ },
+ {
+ "epoch": 0.07403738500988025,
+ "grad_norm": 22162.03125,
+ "learning_rate": 9.989907575884341e-05,
+ "loss": 0.5482,
+ "step": 14350
+ },
+ {
+ "epoch": 0.07429535499249307,
+ "grad_norm": 17026.828125,
+ "learning_rate": 9.989674259150177e-05,
+ "loss": 0.5487,
+ "step": 14400
+ },
+ {
+ "epoch": 0.0745533249751059,
+ "grad_norm": 18335.169921875,
+ "learning_rate": 9.989438279024513e-05,
+ "loss": 0.5459,
+ "step": 14450
+ },
+ {
+ "epoch": 0.07481129495771872,
+ "grad_norm": 19508.666015625,
+ "learning_rate": 9.989199635633309e-05,
+ "loss": 0.5456,
+ "step": 14500
+ },
+ {
+ "epoch": 0.07506926494033155,
+ "grad_norm": 20281.28515625,
+ "learning_rate": 9.98895832910395e-05,
+ "loss": 0.5455,
+ "step": 14550
+ },
+ {
+ "epoch": 0.07532723492294437,
+ "grad_norm": 20196.259765625,
+ "learning_rate": 9.98871435956524e-05,
+ "loss": 0.5474,
+ "step": 14600
+ },
+ {
+ "epoch": 0.07558520490555719,
+ "grad_norm": 18934.544921875,
+ "learning_rate": 9.988467727147409e-05,
+ "loss": 0.546,
+ "step": 14650
+ },
+ {
+ "epoch": 0.07584317488817001,
+ "grad_norm": 20257.126953125,
+ "learning_rate": 9.988218431982098e-05,
+ "loss": 0.5443,
+ "step": 14700
+ },
+ {
+ "epoch": 0.07610114487078283,
+ "grad_norm": 20330.86328125,
+ "learning_rate": 9.98796647420238e-05,
+ "loss": 0.5423,
+ "step": 14750
+ },
+ {
+ "epoch": 0.07635911485339567,
+ "grad_norm": 19077.765625,
+ "learning_rate": 9.987711853942745e-05,
+ "loss": 0.5446,
+ "step": 14800
+ },
+ {
+ "epoch": 0.07661708483600849,
+ "grad_norm": 20855.169921875,
+ "learning_rate": 9.987454571339103e-05,
+ "loss": 0.5427,
+ "step": 14850
+ },
+ {
+ "epoch": 0.07687505481862131,
+ "grad_norm": 20556.005859375,
+ "learning_rate": 9.987194626528788e-05,
+ "loss": 0.5417,
+ "step": 14900
+ },
+ {
+ "epoch": 0.07713302480123413,
+ "grad_norm": 19028.7421875,
+ "learning_rate": 9.986932019650553e-05,
+ "loss": 0.5412,
+ "step": 14950
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "grad_norm": 18669.166015625,
+ "learning_rate": 9.986666750844572e-05,
+ "loss": 0.5404,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07739099478384695,
+ "eval_loss": 0.5350670218467712,
+ "eval_runtime": 3217.7876,
+ "eval_samples_per_second": 963.743,
+ "eval_steps_per_second": 1.882,
+ "step": 15000
+ },
+ {
+ "epoch": 0.07764896476645977,
+ "grad_norm": 19965.779296875,
+ "learning_rate": 9.98639882025244e-05,
+ "loss": 0.5439,
+ "step": 15050
+ },
+ {
+ "epoch": 0.07790693474907259,
+ "grad_norm": 18329.9921875,
+ "learning_rate": 9.986128228017173e-05,
+ "loss": 0.5425,
+ "step": 15100
+ },
+ {
+ "epoch": 0.07816490473168543,
+ "grad_norm": 20102.005859375,
+ "learning_rate": 9.985854974283211e-05,
+ "loss": 0.5444,
+ "step": 15150
+ },
+ {
+ "epoch": 0.07842287471429825,
+ "grad_norm": 19234.671875,
+ "learning_rate": 9.985579059196406e-05,
+ "loss": 0.5443,
+ "step": 15200
+ },
+ {
+ "epoch": 0.07868084469691107,
+ "grad_norm": 18324.298828125,
+ "learning_rate": 9.985300482904041e-05,
+ "loss": 0.5419,
+ "step": 15250
+ },
+ {
+ "epoch": 0.07893881467952389,
+ "grad_norm": 18766.2734375,
+ "learning_rate": 9.985019245554814e-05,
+ "loss": 0.5412,
+ "step": 15300
+ },
+ {
+ "epoch": 0.07919678466213671,
+ "grad_norm": 18805.765625,
+ "learning_rate": 9.984735347298841e-05,
+ "loss": 0.5443,
+ "step": 15350
+ },
+ {
+ "epoch": 0.07945475464474953,
+ "grad_norm": 17677.30078125,
+ "learning_rate": 9.984448788287665e-05,
+ "loss": 0.5421,
+ "step": 15400
+ },
+ {
+ "epoch": 0.07971272462736236,
+ "grad_norm": 19851.3515625,
+ "learning_rate": 9.984159568674243e-05,
+ "loss": 0.5426,
+ "step": 15450
+ },
+ {
+ "epoch": 0.07997069460997518,
+ "grad_norm": 18453.05859375,
+ "learning_rate": 9.983867688612956e-05,
+ "loss": 0.5445,
+ "step": 15500
+ },
+ {
+ "epoch": 0.08022866459258801,
+ "grad_norm": 17366.869140625,
+ "learning_rate": 9.983573148259603e-05,
+ "loss": 0.5451,
+ "step": 15550
+ },
+ {
+ "epoch": 0.08048663457520083,
+ "grad_norm": 18628.716796875,
+ "learning_rate": 9.983275947771407e-05,
+ "loss": 0.5373,
+ "step": 15600
+ },
+ {
+ "epoch": 0.08074460455781365,
+ "grad_norm": 19403.87890625,
+ "learning_rate": 9.982976087307003e-05,
+ "loss": 0.5489,
+ "step": 15650
+ },
+ {
+ "epoch": 0.08100257454042648,
+ "grad_norm": 18485.71875,
+ "learning_rate": 9.982673567026455e-05,
+ "loss": 0.538,
+ "step": 15700
+ },
+ {
+ "epoch": 0.0812605445230393,
+ "grad_norm": 19837.1796875,
+ "learning_rate": 9.982368387091241e-05,
+ "loss": 0.5356,
+ "step": 15750
+ },
+ {
+ "epoch": 0.08151851450565212,
+ "grad_norm": 19505.34375,
+ "learning_rate": 9.982060547664258e-05,
+ "loss": 0.5356,
+ "step": 15800
+ },
+ {
+ "epoch": 0.08177648448826494,
+ "grad_norm": 18645.48828125,
+ "learning_rate": 9.981750048909828e-05,
+ "loss": 0.5381,
+ "step": 15850
+ },
+ {
+ "epoch": 0.08203445447087777,
+ "grad_norm": 20191.73828125,
+ "learning_rate": 9.981436890993689e-05,
+ "loss": 0.535,
+ "step": 15900
+ },
+ {
+ "epoch": 0.0822924244534906,
+ "grad_norm": 18908.15625,
+ "learning_rate": 9.981121074082995e-05,
+ "loss": 0.5405,
+ "step": 15950
+ },
+ {
+ "epoch": 0.08255039443610342,
+ "grad_norm": 19517.73828125,
+ "learning_rate": 9.980802598346326e-05,
+ "loss": 0.5407,
+ "step": 16000
+ },
+ {
+ "epoch": 0.08280836441871624,
+ "grad_norm": 18368.16015625,
+ "learning_rate": 9.980481463953679e-05,
+ "loss": 0.5391,
+ "step": 16050
+ },
+ {
+ "epoch": 0.08306633440132906,
+ "grad_norm": 19727.35546875,
+ "learning_rate": 9.980157671076466e-05,
+ "loss": 0.537,
+ "step": 16100
+ },
+ {
+ "epoch": 0.08332430438394188,
+ "grad_norm": 20757.890625,
+ "learning_rate": 9.979831219887525e-05,
+ "loss": 0.5408,
+ "step": 16150
+ },
+ {
+ "epoch": 0.0835822743665547,
+ "grad_norm": 19334.708984375,
+ "learning_rate": 9.979502110561108e-05,
+ "loss": 0.5371,
+ "step": 16200
+ },
+ {
+ "epoch": 0.08384024434916754,
+ "grad_norm": 19338.498046875,
+ "learning_rate": 9.979170343272886e-05,
+ "loss": 0.531,
+ "step": 16250
+ },
+ {
+ "epoch": 0.08409821433178036,
+ "grad_norm": 18722.365234375,
+ "learning_rate": 9.978835918199949e-05,
+ "loss": 0.5398,
+ "step": 16300
+ },
+ {
+ "epoch": 0.08435618431439318,
+ "grad_norm": 18026.109375,
+ "learning_rate": 9.97849883552081e-05,
+ "loss": 0.5423,
+ "step": 16350
+ },
+ {
+ "epoch": 0.084614154297006,
+ "grad_norm": 19646.78125,
+ "learning_rate": 9.978159095415396e-05,
+ "loss": 0.5387,
+ "step": 16400
+ },
+ {
+ "epoch": 0.08487212427961882,
+ "grad_norm": 20091.552734375,
+ "learning_rate": 9.977816698065052e-05,
+ "loss": 0.5376,
+ "step": 16450
+ },
+ {
+ "epoch": 0.08513009426223164,
+ "grad_norm": 20539.73046875,
+ "learning_rate": 9.977471643652546e-05,
+ "loss": 0.5333,
+ "step": 16500
+ },
+ {
+ "epoch": 0.08538806424484446,
+ "grad_norm": 18306.24609375,
+ "learning_rate": 9.977123932362059e-05,
+ "loss": 0.5405,
+ "step": 16550
+ },
+ {
+ "epoch": 0.0856460342274573,
+ "grad_norm": 20133.513671875,
+ "learning_rate": 9.976773564379193e-05,
+ "loss": 0.541,
+ "step": 16600
+ },
+ {
+ "epoch": 0.08590400421007012,
+ "grad_norm": 19533.50390625,
+ "learning_rate": 9.976420539890969e-05,
+ "loss": 0.5333,
+ "step": 16650
+ },
+ {
+ "epoch": 0.08616197419268294,
+ "grad_norm": 19509.087890625,
+ "learning_rate": 9.976064859085822e-05,
+ "loss": 0.5347,
+ "step": 16700
+ },
+ {
+ "epoch": 0.08641994417529576,
+ "grad_norm": 19590.818359375,
+ "learning_rate": 9.97570652215361e-05,
+ "loss": 0.5377,
+ "step": 16750
+ },
+ {
+ "epoch": 0.08667791415790858,
+ "grad_norm": 19510.705078125,
+ "learning_rate": 9.975345529285605e-05,
+ "loss": 0.5367,
+ "step": 16800
+ },
+ {
+ "epoch": 0.0869358841405214,
+ "grad_norm": 20015.8046875,
+ "learning_rate": 9.974981880674499e-05,
+ "loss": 0.5386,
+ "step": 16850
+ },
+ {
+ "epoch": 0.08719385412313423,
+ "grad_norm": 18704.03125,
+ "learning_rate": 9.974615576514399e-05,
+ "loss": 0.5361,
+ "step": 16900
+ },
+ {
+ "epoch": 0.08745182410574706,
+ "grad_norm": 18257.869140625,
+ "learning_rate": 9.974246617000832e-05,
+ "loss": 0.5304,
+ "step": 16950
+ },
+ {
+ "epoch": 0.08770979408835988,
+ "grad_norm": 18150.517578125,
+ "learning_rate": 9.973875002330743e-05,
+ "loss": 0.5289,
+ "step": 17000
+ },
+ {
+ "epoch": 0.0879677640709727,
+ "grad_norm": 18326.041015625,
+ "learning_rate": 9.97350073270249e-05,
+ "loss": 0.5347,
+ "step": 17050
+ },
+ {
+ "epoch": 0.08822573405358553,
+ "grad_norm": 18199.224609375,
+ "learning_rate": 9.973123808315852e-05,
+ "loss": 0.5269,
+ "step": 17100
+ },
+ {
+ "epoch": 0.08848370403619835,
+ "grad_norm": 20351.447265625,
+ "learning_rate": 9.972744229372025e-05,
+ "loss": 0.5334,
+ "step": 17150
+ },
+ {
+ "epoch": 0.08874167401881117,
+ "grad_norm": 19200.703125,
+ "learning_rate": 9.97236199607362e-05,
+ "loss": 0.5316,
+ "step": 17200
+ },
+ {
+ "epoch": 0.08899964400142399,
+ "grad_norm": 18855.7890625,
+ "learning_rate": 9.971977108624664e-05,
+ "loss": 0.5342,
+ "step": 17250
+ },
+ {
+ "epoch": 0.08925761398403682,
+ "grad_norm": 18889.56640625,
+ "learning_rate": 9.971589567230606e-05,
+ "loss": 0.5361,
+ "step": 17300
+ },
+ {
+ "epoch": 0.08951558396664965,
+ "grad_norm": 18003.9921875,
+ "learning_rate": 9.971199372098304e-05,
+ "loss": 0.5353,
+ "step": 17350
+ },
+ {
+ "epoch": 0.08977355394926247,
+ "grad_norm": 19555.30078125,
+ "learning_rate": 9.970806523436041e-05,
+ "loss": 0.5306,
+ "step": 17400
+ },
+ {
+ "epoch": 0.09003152393187529,
+ "grad_norm": 19433.37890625,
+ "learning_rate": 9.97041102145351e-05,
+ "loss": 0.5341,
+ "step": 17450
+ },
+ {
+ "epoch": 0.09028949391448811,
+ "grad_norm": 19238.341796875,
+ "learning_rate": 9.97001286636182e-05,
+ "loss": 0.5372,
+ "step": 17500
+ },
+ {
+ "epoch": 0.09054746389710093,
+ "grad_norm": 18698.78125,
+ "learning_rate": 9.969612058373502e-05,
+ "loss": 0.5356,
+ "step": 17550
+ },
+ {
+ "epoch": 0.09080543387971375,
+ "grad_norm": 17953.580078125,
+ "learning_rate": 9.969208597702497e-05,
+ "loss": 0.529,
+ "step": 17600
+ },
+ {
+ "epoch": 0.09106340386232659,
+ "grad_norm": 17678.716796875,
+ "learning_rate": 9.968802484564168e-05,
+ "loss": 0.5329,
+ "step": 17650
+ },
+ {
+ "epoch": 0.09132137384493941,
+ "grad_norm": 20412.287109375,
+ "learning_rate": 9.968393719175286e-05,
+ "loss": 0.534,
+ "step": 17700
+ },
+ {
+ "epoch": 0.09157934382755223,
+ "grad_norm": 20080.16015625,
+ "learning_rate": 9.967982301754044e-05,
+ "loss": 0.5307,
+ "step": 17750
+ },
+ {
+ "epoch": 0.09183731381016505,
+ "grad_norm": 18570.314453125,
+ "learning_rate": 9.96756823252005e-05,
+ "loss": 0.526,
+ "step": 17800
+ },
+ {
+ "epoch": 0.09209528379277787,
+ "grad_norm": 18329.107421875,
+ "learning_rate": 9.967151511694324e-05,
+ "loss": 0.5273,
+ "step": 17850
+ },
+ {
+ "epoch": 0.09235325377539069,
+ "grad_norm": 19036.18359375,
+ "learning_rate": 9.966732139499304e-05,
+ "loss": 0.5275,
+ "step": 17900
+ },
+ {
+ "epoch": 0.09261122375800351,
+ "grad_norm": 18708.826171875,
+ "learning_rate": 9.966310116158844e-05,
+ "loss": 0.5313,
+ "step": 17950
+ },
+ {
+ "epoch": 0.09286919374061635,
+ "grad_norm": 18660.791015625,
+ "learning_rate": 9.96588544189821e-05,
+ "loss": 0.5303,
+ "step": 18000
+ },
+ {
+ "epoch": 0.09312716372322917,
+ "grad_norm": 19709.181640625,
+ "learning_rate": 9.965458116944086e-05,
+ "loss": 0.5347,
+ "step": 18050
+ },
+ {
+ "epoch": 0.09338513370584199,
+ "grad_norm": 19683.798828125,
+ "learning_rate": 9.96502814152457e-05,
+ "loss": 0.5359,
+ "step": 18100
+ },
+ {
+ "epoch": 0.09364310368845481,
+ "grad_norm": 19533.09765625,
+ "learning_rate": 9.964595515869175e-05,
+ "loss": 0.5263,
+ "step": 18150
+ },
+ {
+ "epoch": 0.09390107367106763,
+ "grad_norm": 20254.892578125,
+ "learning_rate": 9.964160240208826e-05,
+ "loss": 0.5307,
+ "step": 18200
+ },
+ {
+ "epoch": 0.09415904365368045,
+ "grad_norm": 21316.876953125,
+ "learning_rate": 9.963722314775868e-05,
+ "loss": 0.5316,
+ "step": 18250
+ },
+ {
+ "epoch": 0.09441701363629328,
+ "grad_norm": 20027.03515625,
+ "learning_rate": 9.963281739804054e-05,
+ "loss": 0.5274,
+ "step": 18300
+ },
+ {
+ "epoch": 0.0946749836189061,
+ "grad_norm": 18551.994140625,
+ "learning_rate": 9.962838515528554e-05,
+ "loss": 0.5339,
+ "step": 18350
+ },
+ {
+ "epoch": 0.09493295360151893,
+ "grad_norm": 17779.97265625,
+ "learning_rate": 9.962392642185956e-05,
+ "loss": 0.5301,
+ "step": 18400
+ },
+ {
+ "epoch": 0.09519092358413175,
+ "grad_norm": 20620.232421875,
+ "learning_rate": 9.961944120014256e-05,
+ "loss": 0.522,
+ "step": 18450
+ },
+ {
+ "epoch": 0.09544889356674457,
+ "grad_norm": 18669.73046875,
+ "learning_rate": 9.961492949252868e-05,
+ "loss": 0.5261,
+ "step": 18500
+ },
+ {
+ "epoch": 0.0957068635493574,
+ "grad_norm": 19528.4765625,
+ "learning_rate": 9.961039130142617e-05,
+ "loss": 0.5276,
+ "step": 18550
+ },
+ {
+ "epoch": 0.09596483353197022,
+ "grad_norm": 19643.099609375,
+ "learning_rate": 9.960582662925744e-05,
+ "loss": 0.5332,
+ "step": 18600
+ },
+ {
+ "epoch": 0.09622280351458304,
+ "grad_norm": 19024.4375,
+ "learning_rate": 9.960123547845901e-05,
+ "loss": 0.529,
+ "step": 18650
+ },
+ {
+ "epoch": 0.09648077349719586,
+ "grad_norm": 20228.248046875,
+ "learning_rate": 9.959661785148155e-05,
+ "loss": 0.5322,
+ "step": 18700
+ },
+ {
+ "epoch": 0.0967387434798087,
+ "grad_norm": 20120.126953125,
+ "learning_rate": 9.959197375078986e-05,
+ "loss": 0.5256,
+ "step": 18750
+ },
+ {
+ "epoch": 0.09699671346242152,
+ "grad_norm": 19894.423828125,
+ "learning_rate": 9.95873031788629e-05,
+ "loss": 0.5257,
+ "step": 18800
+ },
+ {
+ "epoch": 0.09725468344503434,
+ "grad_norm": 18450.8671875,
+ "learning_rate": 9.958260613819367e-05,
+ "loss": 0.5268,
+ "step": 18850
+ },
+ {
+ "epoch": 0.09751265342764716,
+ "grad_norm": 22775.53125,
+ "learning_rate": 9.95778826312894e-05,
+ "loss": 0.5293,
+ "step": 18900
+ },
+ {
+ "epoch": 0.09777062341025998,
+ "grad_norm": 17769.38671875,
+ "learning_rate": 9.95731326606714e-05,
+ "loss": 0.5281,
+ "step": 18950
+ },
+ {
+ "epoch": 0.0980285933928728,
+ "grad_norm": 20731.322265625,
+ "learning_rate": 9.956835622887514e-05,
+ "loss": 0.5327,
+ "step": 19000
+ },
+ {
+ "epoch": 0.09828656337548562,
+ "grad_norm": 20059.11328125,
+ "learning_rate": 9.956355333845014e-05,
+ "loss": 0.5279,
+ "step": 19050
+ },
+ {
+ "epoch": 0.09854453335809846,
+ "grad_norm": 17477.626953125,
+ "learning_rate": 9.955872399196012e-05,
+ "loss": 0.5257,
+ "step": 19100
+ },
+ {
+ "epoch": 0.09880250334071128,
+ "grad_norm": 20293.232421875,
+ "learning_rate": 9.955386819198287e-05,
+ "loss": 0.5258,
+ "step": 19150
+ },
+ {
+ "epoch": 0.0990604733233241,
+ "grad_norm": 19330.4140625,
+ "learning_rate": 9.954898594111035e-05,
+ "loss": 0.5231,
+ "step": 19200
+ },
+ {
+ "epoch": 0.09931844330593692,
+ "grad_norm": 19410.818359375,
+ "learning_rate": 9.954407724194858e-05,
+ "loss": 0.5286,
+ "step": 19250
+ },
+ {
+ "epoch": 0.09957641328854974,
+ "grad_norm": 18320.552734375,
+ "learning_rate": 9.953914209711775e-05,
+ "loss": 0.5287,
+ "step": 19300
+ },
+ {
+ "epoch": 0.09983438327116256,
+ "grad_norm": 17585.583984375,
+ "learning_rate": 9.953418050925213e-05,
+ "loss": 0.5265,
+ "step": 19350
+ },
+ {
+ "epoch": 0.10009235325377538,
+ "grad_norm": 20318.298828125,
+ "learning_rate": 9.952919248100012e-05,
+ "loss": 0.5292,
+ "step": 19400
+ },
+ {
+ "epoch": 0.10035032323638822,
+ "grad_norm": 20239.33984375,
+ "learning_rate": 9.952417801502426e-05,
+ "loss": 0.522,
+ "step": 19450
+ },
+ {
+ "epoch": 0.10060829321900104,
+ "grad_norm": 18922.158203125,
+ "learning_rate": 9.951913711400115e-05,
+ "loss": 0.5275,
+ "step": 19500
+ },
+ {
+ "epoch": 0.10086626320161386,
+ "grad_norm": 18332.673828125,
+ "learning_rate": 9.951406978062153e-05,
+ "loss": 0.5282,
+ "step": 19550
+ },
+ {
+ "epoch": 0.10112423318422668,
+ "grad_norm": 19321.662109375,
+ "learning_rate": 9.950897601759024e-05,
+ "loss": 0.5236,
+ "step": 19600
+ },
+ {
+ "epoch": 0.1013822031668395,
+ "grad_norm": 19050.42578125,
+ "learning_rate": 9.950385582762624e-05,
+ "loss": 0.5269,
+ "step": 19650
+ },
+ {
+ "epoch": 0.10164017314945233,
+ "grad_norm": 18592.8125,
+ "learning_rate": 9.949870921346259e-05,
+ "loss": 0.5294,
+ "step": 19700
+ },
+ {
+ "epoch": 0.10189814313206515,
+ "grad_norm": 17702.080078125,
+ "learning_rate": 9.949353617784644e-05,
+ "loss": 0.5321,
+ "step": 19750
+ },
+ {
+ "epoch": 0.10215611311467798,
+ "grad_norm": 18935.71875,
+ "learning_rate": 9.948833672353907e-05,
+ "loss": 0.5279,
+ "step": 19800
+ },
+ {
+ "epoch": 0.1024140830972908,
+ "grad_norm": 19814.96484375,
+ "learning_rate": 9.948311085331585e-05,
+ "loss": 0.5174,
+ "step": 19850
+ },
+ {
+ "epoch": 0.10267205307990362,
+ "grad_norm": 18945.4375,
+ "learning_rate": 9.947785856996623e-05,
+ "loss": 0.525,
+ "step": 19900
+ },
+ {
+ "epoch": 0.10293002306251645,
+ "grad_norm": 19162.28125,
+ "learning_rate": 9.947257987629379e-05,
+ "loss": 0.5268,
+ "step": 19950
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "grad_norm": 18814.861328125,
+ "learning_rate": 9.94672747751162e-05,
+ "loss": 0.5191,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10318799304512927,
+ "eval_loss": 0.5160176157951355,
+ "eval_runtime": 3272.5369,
+ "eval_samples_per_second": 947.62,
+ "eval_steps_per_second": 1.851,
+ "step": 20000
+ },
+ {
+ "epoch": 0.10344596302774209,
+ "grad_norm": 19089.77734375,
+ "learning_rate": 9.94619432692652e-05,
+ "loss": 0.5254,
+ "step": 20050
+ },
+ {
+ "epoch": 0.10370393301035491,
+ "grad_norm": 19005.53125,
+ "learning_rate": 9.945658536158667e-05,
+ "loss": 0.525,
+ "step": 20100
+ },
+ {
+ "epoch": 0.10396190299296774,
+ "grad_norm": 20896.8125,
+ "learning_rate": 9.945120105494054e-05,
+ "loss": 0.5173,
+ "step": 20150
+ },
+ {
+ "epoch": 0.10421987297558057,
+ "grad_norm": 19254.22265625,
+ "learning_rate": 9.944579035220085e-05,
+ "loss": 0.5195,
+ "step": 20200
+ },
+ {
+ "epoch": 0.10447784295819339,
+ "grad_norm": 19317.572265625,
+ "learning_rate": 9.944035325625573e-05,
+ "loss": 0.5239,
+ "step": 20250
+ },
+ {
+ "epoch": 0.10473581294080621,
+ "grad_norm": 18661.330078125,
+ "learning_rate": 9.94348897700074e-05,
+ "loss": 0.5243,
+ "step": 20300
+ },
+ {
+ "epoch": 0.10499378292341903,
+ "grad_norm": 18914.298828125,
+ "learning_rate": 9.942939989637216e-05,
+ "loss": 0.5247,
+ "step": 20350
+ },
+ {
+ "epoch": 0.10525175290603185,
+ "grad_norm": 17788.77734375,
+ "learning_rate": 9.942388363828041e-05,
+ "loss": 0.5205,
+ "step": 20400
+ },
+ {
+ "epoch": 0.10550972288864467,
+ "grad_norm": 17314.578125,
+ "learning_rate": 9.941834099867659e-05,
+ "loss": 0.5182,
+ "step": 20450
+ },
+ {
+ "epoch": 0.10576769287125751,
+ "grad_norm": 18627.068359375,
+ "learning_rate": 9.941277198051931e-05,
+ "loss": 0.5208,
+ "step": 20500
+ },
+ {
+ "epoch": 0.10602566285387033,
+ "grad_norm": 18274.4609375,
+ "learning_rate": 9.940717658678113e-05,
+ "loss": 0.5244,
+ "step": 20550
+ },
+ {
+ "epoch": 0.10628363283648315,
+ "grad_norm": 18668.767578125,
+ "learning_rate": 9.940155482044884e-05,
+ "loss": 0.5237,
+ "step": 20600
+ },
+ {
+ "epoch": 0.10654160281909597,
+ "grad_norm": 17703.703125,
+ "learning_rate": 9.939590668452316e-05,
+ "loss": 0.5148,
+ "step": 20650
+ },
+ {
+ "epoch": 0.10679957280170879,
+ "grad_norm": 18372.7578125,
+ "learning_rate": 9.939023218201901e-05,
+ "loss": 0.522,
+ "step": 20700
+ },
+ {
+ "epoch": 0.10705754278432161,
+ "grad_norm": 18439.521484375,
+ "learning_rate": 9.93845313159653e-05,
+ "loss": 0.5177,
+ "step": 20750
+ },
+ {
+ "epoch": 0.10731551276693443,
+ "grad_norm": 18812.10546875,
+ "learning_rate": 9.937880408940504e-05,
+ "loss": 0.5161,
+ "step": 20800
+ },
+ {
+ "epoch": 0.10757348274954727,
+ "grad_norm": 19163.4296875,
+ "learning_rate": 9.937305050539534e-05,
+ "loss": 0.5175,
+ "step": 20850
+ },
+ {
+ "epoch": 0.10783145273216009,
+ "grad_norm": 19459.3984375,
+ "learning_rate": 9.936727056700732e-05,
+ "loss": 0.5257,
+ "step": 20900
+ },
+ {
+ "epoch": 0.10808942271477291,
+ "grad_norm": 20272.22265625,
+ "learning_rate": 9.93614642773262e-05,
+ "loss": 0.5244,
+ "step": 20950
+ },
+ {
+ "epoch": 0.10834739269738573,
+ "grad_norm": 19995.736328125,
+ "learning_rate": 9.93556316394513e-05,
+ "loss": 0.5179,
+ "step": 21000
+ },
+ {
+ "epoch": 0.10860536267999855,
+ "grad_norm": 20567.369140625,
+ "learning_rate": 9.934977265649594e-05,
+ "loss": 0.528,
+ "step": 21050
+ },
+ {
+ "epoch": 0.10886333266261138,
+ "grad_norm": 19328.57421875,
+ "learning_rate": 9.934388733158753e-05,
+ "loss": 0.5249,
+ "step": 21100
+ },
+ {
+ "epoch": 0.1091213026452242,
+ "grad_norm": 17305.19921875,
+ "learning_rate": 9.933797566786757e-05,
+ "loss": 0.5163,
+ "step": 21150
+ },
+ {
+ "epoch": 0.10937927262783702,
+ "grad_norm": 19983.99609375,
+ "learning_rate": 9.933203766849155e-05,
+ "loss": 0.5227,
+ "step": 21200
+ },
+ {
+ "epoch": 0.10963724261044985,
+ "grad_norm": 18918.16015625,
+ "learning_rate": 9.93260733366291e-05,
+ "loss": 0.521,
+ "step": 21250
+ },
+ {
+ "epoch": 0.10989521259306267,
+ "grad_norm": 19260.40625,
+ "learning_rate": 9.932008267546384e-05,
+ "loss": 0.5195,
+ "step": 21300
+ },
+ {
+ "epoch": 0.1101531825756755,
+ "grad_norm": 16713.015625,
+ "learning_rate": 9.931406568819348e-05,
+ "loss": 0.5187,
+ "step": 21350
+ },
+ {
+ "epoch": 0.11041115255828832,
+ "grad_norm": 19787.67578125,
+ "learning_rate": 9.930802237802976e-05,
+ "loss": 0.5152,
+ "step": 21400
+ },
+ {
+ "epoch": 0.11066912254090114,
+ "grad_norm": 20632.775390625,
+ "learning_rate": 9.93019527481985e-05,
+ "loss": 0.5158,
+ "step": 21450
+ },
+ {
+ "epoch": 0.11092709252351396,
+ "grad_norm": 18545.748046875,
+ "learning_rate": 9.929585680193951e-05,
+ "loss": 0.5161,
+ "step": 21500
+ },
+ {
+ "epoch": 0.11118506250612678,
+ "grad_norm": 18961.138671875,
+ "learning_rate": 9.928973454250674e-05,
+ "loss": 0.5192,
+ "step": 21550
+ },
+ {
+ "epoch": 0.11144303248873962,
+ "grad_norm": 18970.013671875,
+ "learning_rate": 9.928358597316812e-05,
+ "loss": 0.5211,
+ "step": 21600
+ },
+ {
+ "epoch": 0.11170100247135244,
+ "grad_norm": 20800.046875,
+ "learning_rate": 9.927741109720561e-05,
+ "loss": 0.5143,
+ "step": 21650
+ },
+ {
+ "epoch": 0.11195897245396526,
+ "grad_norm": 18738.564453125,
+ "learning_rate": 9.927120991791528e-05,
+ "loss": 0.5232,
+ "step": 21700
+ },
+ {
+ "epoch": 0.11221694243657808,
+ "grad_norm": 18495.798828125,
+ "learning_rate": 9.926498243860715e-05,
+ "loss": 0.5176,
+ "step": 21750
+ },
+ {
+ "epoch": 0.1124749124191909,
+ "grad_norm": 18129.375,
+ "learning_rate": 9.925872866260537e-05,
+ "loss": 0.5132,
+ "step": 21800
+ },
+ {
+ "epoch": 0.11273288240180372,
+ "grad_norm": 19332.751953125,
+ "learning_rate": 9.925244859324807e-05,
+ "loss": 0.5135,
+ "step": 21850
+ },
+ {
+ "epoch": 0.11299085238441654,
+ "grad_norm": 19395.544921875,
+ "learning_rate": 9.924614223388742e-05,
+ "loss": 0.5191,
+ "step": 21900
+ },
+ {
+ "epoch": 0.11324882236702938,
+ "grad_norm": 20292.890625,
+ "learning_rate": 9.923980958788964e-05,
+ "loss": 0.5212,
+ "step": 21950
+ },
+ {
+ "epoch": 0.1135067923496422,
+ "grad_norm": 20309.033203125,
+ "learning_rate": 9.923345065863498e-05,
+ "loss": 0.5134,
+ "step": 22000
+ },
+ {
+ "epoch": 0.11376476233225502,
+ "grad_norm": 17513.578125,
+ "learning_rate": 9.922706544951772e-05,
+ "loss": 0.5216,
+ "step": 22050
+ },
+ {
+ "epoch": 0.11402273231486784,
+ "grad_norm": 18886.10546875,
+ "learning_rate": 9.922065396394614e-05,
+ "loss": 0.5219,
+ "step": 22100
+ },
+ {
+ "epoch": 0.11428070229748066,
+ "grad_norm": 19656.1484375,
+ "learning_rate": 9.921421620534257e-05,
+ "loss": 0.5163,
+ "step": 22150
+ },
+ {
+ "epoch": 0.11453867228009348,
+ "grad_norm": 18463.068359375,
+ "learning_rate": 9.920775217714338e-05,
+ "loss": 0.5198,
+ "step": 22200
+ },
+ {
+ "epoch": 0.1147966422627063,
+ "grad_norm": 20666.400390625,
+ "learning_rate": 9.920126188279892e-05,
+ "loss": 0.5164,
+ "step": 22250
+ },
+ {
+ "epoch": 0.11505461224531914,
+ "grad_norm": 20401.681640625,
+ "learning_rate": 9.919474532577359e-05,
+ "loss": 0.5163,
+ "step": 22300
+ },
+ {
+ "epoch": 0.11531258222793196,
+ "grad_norm": 21289.541015625,
+ "learning_rate": 9.918820250954581e-05,
+ "loss": 0.5114,
+ "step": 22350
+ },
+ {
+ "epoch": 0.11557055221054478,
+ "grad_norm": 17559.50390625,
+ "learning_rate": 9.918163343760801e-05,
+ "loss": 0.5156,
+ "step": 22400
+ },
+ {
+ "epoch": 0.1158285221931576,
+ "grad_norm": 17041.087890625,
+ "learning_rate": 9.917503811346662e-05,
+ "loss": 0.5146,
+ "step": 22450
+ },
+ {
+ "epoch": 0.11608649217577043,
+ "grad_norm": 20508.087890625,
+ "learning_rate": 9.916841654064212e-05,
+ "loss": 0.5202,
+ "step": 22500
+ },
+ {
+ "epoch": 0.11634446215838325,
+ "grad_norm": 21307.646484375,
+ "learning_rate": 9.916176872266894e-05,
+ "loss": 0.5108,
+ "step": 22550
+ },
+ {
+ "epoch": 0.11660243214099607,
+ "grad_norm": 21765.580078125,
+ "learning_rate": 9.91550946630956e-05,
+ "loss": 0.5158,
+ "step": 22600
+ },
+ {
+ "epoch": 0.1168604021236089,
+ "grad_norm": 18173.646484375,
+ "learning_rate": 9.914839436548454e-05,
+ "loss": 0.5081,
+ "step": 22650
+ },
+ {
+ "epoch": 0.11711837210622172,
+ "grad_norm": 19044.880859375,
+ "learning_rate": 9.914166783341227e-05,
+ "loss": 0.5144,
+ "step": 22700
+ },
+ {
+ "epoch": 0.11737634208883455,
+ "grad_norm": 19291.37109375,
+ "learning_rate": 9.91349150704693e-05,
+ "loss": 0.5147,
+ "step": 22750
+ },
+ {
+ "epoch": 0.11763431207144737,
+ "grad_norm": 16757.376953125,
+ "learning_rate": 9.91281360802601e-05,
+ "loss": 0.5163,
+ "step": 22800
+ },
+ {
+ "epoch": 0.11789228205406019,
+ "grad_norm": 18870.287109375,
+ "learning_rate": 9.912133086640318e-05,
+ "loss": 0.512,
+ "step": 22850
+ },
+ {
+ "epoch": 0.11815025203667301,
+ "grad_norm": 20520.115234375,
+ "learning_rate": 9.911449943253102e-05,
+ "loss": 0.5175,
+ "step": 22900
+ },
+ {
+ "epoch": 0.11840822201928583,
+ "grad_norm": 20585.21484375,
+ "learning_rate": 9.910764178229011e-05,
+ "loss": 0.5114,
+ "step": 22950
+ },
+ {
+ "epoch": 0.11866619200189867,
+ "grad_norm": 18660.384765625,
+ "learning_rate": 9.910075791934092e-05,
+ "loss": 0.5115,
+ "step": 23000
+ },
+ {
+ "epoch": 0.11892416198451149,
+ "grad_norm": 19391.318359375,
+ "learning_rate": 9.909384784735794e-05,
+ "loss": 0.5198,
+ "step": 23050
+ },
+ {
+ "epoch": 0.11918213196712431,
+ "grad_norm": 18007.306640625,
+ "learning_rate": 9.908691157002962e-05,
+ "loss": 0.5125,
+ "step": 23100
+ },
+ {
+ "epoch": 0.11944010194973713,
+ "grad_norm": 20804.501953125,
+ "learning_rate": 9.907994909105842e-05,
+ "loss": 0.516,
+ "step": 23150
+ },
+ {
+ "epoch": 0.11969807193234995,
+ "grad_norm": 18307.63671875,
+ "learning_rate": 9.907296041416076e-05,
+ "loss": 0.5108,
+ "step": 23200
+ },
+ {
+ "epoch": 0.11995604191496277,
+ "grad_norm": 19694.552734375,
+ "learning_rate": 9.906594554306709e-05,
+ "loss": 0.5092,
+ "step": 23250
+ },
+ {
+ "epoch": 0.12021401189757559,
+ "grad_norm": 20234.0703125,
+ "learning_rate": 9.90589044815218e-05,
+ "loss": 0.515,
+ "step": 23300
+ },
+ {
+ "epoch": 0.12047198188018843,
+ "grad_norm": 18483.4296875,
+ "learning_rate": 9.905183723328327e-05,
+ "loss": 0.5127,
+ "step": 23350
+ },
+ {
+ "epoch": 0.12072995186280125,
+ "grad_norm": 17447.51953125,
+ "learning_rate": 9.904474380212384e-05,
+ "loss": 0.5107,
+ "step": 23400
+ },
+ {
+ "epoch": 0.12098792184541407,
+ "grad_norm": 18881.7109375,
+ "learning_rate": 9.903762419182986e-05,
+ "loss": 0.5177,
+ "step": 23450
+ },
+ {
+ "epoch": 0.12124589182802689,
+ "grad_norm": 17861.990234375,
+ "learning_rate": 9.903047840620168e-05,
+ "loss": 0.5128,
+ "step": 23500
+ },
+ {
+ "epoch": 0.12150386181063971,
+ "grad_norm": 19111.53515625,
+ "learning_rate": 9.902330644905351e-05,
+ "loss": 0.5134,
+ "step": 23550
+ },
+ {
+ "epoch": 0.12176183179325253,
+ "grad_norm": 18461.107421875,
+ "learning_rate": 9.901610832421366e-05,
+ "loss": 0.51,
+ "step": 23600
+ },
+ {
+ "epoch": 0.12201980177586536,
+ "grad_norm": 18103.701171875,
+ "learning_rate": 9.900888403552431e-05,
+ "loss": 0.5131,
+ "step": 23650
+ },
+ {
+ "epoch": 0.12227777175847819,
+ "grad_norm": 18334.755859375,
+ "learning_rate": 9.900163358684168e-05,
+ "loss": 0.511,
+ "step": 23700
+ },
+ {
+ "epoch": 0.12253574174109101,
+ "grad_norm": 17476.322265625,
+ "learning_rate": 9.89943569820359e-05,
+ "loss": 0.5151,
+ "step": 23750
+ },
+ {
+ "epoch": 0.12279371172370383,
+ "grad_norm": 18698.09765625,
+ "learning_rate": 9.898705422499107e-05,
+ "loss": 0.5146,
+ "step": 23800
+ },
+ {
+ "epoch": 0.12305168170631665,
+ "grad_norm": 18321.80859375,
+ "learning_rate": 9.897972531960528e-05,
+ "loss": 0.5109,
+ "step": 23850
+ },
+ {
+ "epoch": 0.12330965168892948,
+ "grad_norm": 18234.361328125,
+ "learning_rate": 9.897237026979056e-05,
+ "loss": 0.5115,
+ "step": 23900
+ },
+ {
+ "epoch": 0.1235676216715423,
+ "grad_norm": 19737.849609375,
+ "learning_rate": 9.896498907947287e-05,
+ "loss": 0.5155,
+ "step": 23950
+ },
+ {
+ "epoch": 0.12382559165415512,
+ "grad_norm": 19136.279296875,
+ "learning_rate": 9.895758175259218e-05,
+ "loss": 0.5162,
+ "step": 24000
+ },
+ {
+ "epoch": 0.12408356163676794,
+ "grad_norm": 18575.431640625,
+ "learning_rate": 9.895014829310235e-05,
+ "loss": 0.5141,
+ "step": 24050
+ },
+ {
+ "epoch": 0.12434153161938077,
+ "grad_norm": 17589.353515625,
+ "learning_rate": 9.894268870497121e-05,
+ "loss": 0.501,
+ "step": 24100
+ },
+ {
+ "epoch": 0.1245995016019936,
+ "grad_norm": 19781.830078125,
+ "learning_rate": 9.893520299218057e-05,
+ "loss": 0.5128,
+ "step": 24150
+ },
+ {
+ "epoch": 0.12485747158460642,
+ "grad_norm": 17501.150390625,
+ "learning_rate": 9.892769115872617e-05,
+ "loss": 0.5113,
+ "step": 24200
+ },
+ {
+ "epoch": 0.12511544156721924,
+ "grad_norm": 21107.34375,
+ "learning_rate": 9.892015320861762e-05,
+ "loss": 0.5041,
+ "step": 24250
+ },
+ {
+ "epoch": 0.12537341154983206,
+ "grad_norm": 17529.345703125,
+ "learning_rate": 9.89125891458786e-05,
+ "loss": 0.5093,
+ "step": 24300
+ },
+ {
+ "epoch": 0.12563138153244488,
+ "grad_norm": 18061.890625,
+ "learning_rate": 9.890499897454663e-05,
+ "loss": 0.5111,
+ "step": 24350
+ },
+ {
+ "epoch": 0.1258893515150577,
+ "grad_norm": 21213.177734375,
+ "learning_rate": 9.889738269867318e-05,
+ "loss": 0.5106,
+ "step": 24400
+ },
+ {
+ "epoch": 0.12614732149767052,
+ "grad_norm": 17838.625,
+ "learning_rate": 9.88897403223237e-05,
+ "loss": 0.5144,
+ "step": 24450
+ },
+ {
+ "epoch": 0.12640529148028334,
+ "grad_norm": 19047.787109375,
+ "learning_rate": 9.888207184957752e-05,
+ "loss": 0.5133,
+ "step": 24500
+ },
+ {
+ "epoch": 0.12666326146289617,
+ "grad_norm": 17355.26171875,
+ "learning_rate": 9.887437728452794e-05,
+ "loss": 0.5054,
+ "step": 24550
+ },
+ {
+ "epoch": 0.12692123144550901,
+ "grad_norm": 20496.369140625,
+ "learning_rate": 9.886665663128216e-05,
+ "loss": 0.51,
+ "step": 24600
+ },
+ {
+ "epoch": 0.12717920142812184,
+ "grad_norm": 19887.734375,
+ "learning_rate": 9.885890989396133e-05,
+ "loss": 0.5049,
+ "step": 24650
+ },
+ {
+ "epoch": 0.12743717141073466,
+ "grad_norm": 20027.69140625,
+ "learning_rate": 9.885113707670049e-05,
+ "loss": 0.5118,
+ "step": 24700
+ },
+ {
+ "epoch": 0.12769514139334748,
+ "grad_norm": 18888.92578125,
+ "learning_rate": 9.884333818364861e-05,
+ "loss": 0.5168,
+ "step": 24750
+ },
+ {
+ "epoch": 0.1279531113759603,
+ "grad_norm": 20906.673828125,
+ "learning_rate": 9.883551321896862e-05,
+ "loss": 0.5109,
+ "step": 24800
+ },
+ {
+ "epoch": 0.12821108135857312,
+ "grad_norm": 20228.833984375,
+ "learning_rate": 9.882766218683731e-05,
+ "loss": 0.5167,
+ "step": 24850
+ },
+ {
+ "epoch": 0.12846905134118594,
+ "grad_norm": 19832.4609375,
+ "learning_rate": 9.881978509144543e-05,
+ "loss": 0.5113,
+ "step": 24900
+ },
+ {
+ "epoch": 0.12872702132379876,
+ "grad_norm": 18049.193359375,
+ "learning_rate": 9.881188193699758e-05,
+ "loss": 0.5121,
+ "step": 24950
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "grad_norm": 18765.033203125,
+ "learning_rate": 9.880395272771236e-05,
+ "loss": 0.5123,
+ "step": 25000
+ },
+ {
+ "epoch": 0.12898499130641158,
+ "eval_loss": 0.5013377666473389,
+ "eval_runtime": 3332.4061,
+ "eval_samples_per_second": 930.595,
+ "eval_steps_per_second": 1.818,
+ "step": 25000
+ },
+ {
+ "epoch": 0.1292429612890244,
+ "grad_norm": 18435.787109375,
+ "learning_rate": 9.879599746782221e-05,
+ "loss": 0.5096,
+ "step": 25050
+ },
+ {
+ "epoch": 0.12950093127163723,
+ "grad_norm": 18993.890625,
+ "learning_rate": 9.878801616157348e-05,
+ "loss": 0.5091,
+ "step": 25100
+ },
+ {
+ "epoch": 0.12975890125425005,
+ "grad_norm": 19766.783203125,
+ "learning_rate": 9.878000881322646e-05,
+ "loss": 0.5059,
+ "step": 25150
+ },
+ {
+ "epoch": 0.13001687123686287,
+ "grad_norm": 19316.537109375,
+ "learning_rate": 9.87719754270553e-05,
+ "loss": 0.5112,
+ "step": 25200
+ },
+ {
+ "epoch": 0.1302748412194757,
+ "grad_norm": 19288.64453125,
+ "learning_rate": 9.876391600734807e-05,
+ "loss": 0.5031,
+ "step": 25250
+ },
+ {
+ "epoch": 0.1305328112020885,
+ "grad_norm": 18962.7734375,
+ "learning_rate": 9.875583055840673e-05,
+ "loss": 0.5113,
+ "step": 25300
+ },
+ {
+ "epoch": 0.13079078118470136,
+ "grad_norm": 19399.21875,
+ "learning_rate": 9.874771908454714e-05,
+ "loss": 0.5177,
+ "step": 25350
+ },
+ {
+ "epoch": 0.13104875116731418,
+ "grad_norm": 20511.134765625,
+ "learning_rate": 9.873958159009904e-05,
+ "loss": 0.5049,
+ "step": 25400
+ },
+ {
+ "epoch": 0.131306721149927,
+ "grad_norm": 17669.00390625,
+ "learning_rate": 9.87314180794061e-05,
+ "loss": 0.5076,
+ "step": 25450
+ },
+ {
+ "epoch": 0.13156469113253982,
+ "grad_norm": 20254.75390625,
+ "learning_rate": 9.872322855682579e-05,
+ "loss": 0.5102,
+ "step": 25500
+ },
+ {
+ "epoch": 0.13182266111515265,
+ "grad_norm": 21859.880859375,
+ "learning_rate": 9.871501302672956e-05,
+ "loss": 0.5098,
+ "step": 25550
+ },
+ {
+ "epoch": 0.13208063109776547,
+ "grad_norm": 18794.90625,
+ "learning_rate": 9.870677149350268e-05,
+ "loss": 0.5078,
+ "step": 25600
+ },
+ {
+ "epoch": 0.1323386010803783,
+ "grad_norm": 19909.65625,
+ "learning_rate": 9.869850396154434e-05,
+ "loss": 0.5129,
+ "step": 25650
+ },
+ {
+ "epoch": 0.1325965710629911,
+ "grad_norm": 17887.99609375,
+ "learning_rate": 9.869021043526756e-05,
+ "loss": 0.508,
+ "step": 25700
+ },
+ {
+ "epoch": 0.13285454104560393,
+ "grad_norm": 17189.033203125,
+ "learning_rate": 9.868189091909929e-05,
+ "loss": 0.5114,
+ "step": 25750
+ },
+ {
+ "epoch": 0.13311251102821675,
+ "grad_norm": 21320.78125,
+ "learning_rate": 9.867354541748033e-05,
+ "loss": 0.5081,
+ "step": 25800
+ },
+ {
+ "epoch": 0.13337048101082957,
+ "grad_norm": 19035.33984375,
+ "learning_rate": 9.866517393486532e-05,
+ "loss": 0.5065,
+ "step": 25850
+ },
+ {
+ "epoch": 0.1336284509934424,
+ "grad_norm": 19038.876953125,
+ "learning_rate": 9.86567764757228e-05,
+ "loss": 0.5055,
+ "step": 25900
+ },
+ {
+ "epoch": 0.13388642097605521,
+ "grad_norm": 20425.6875,
+ "learning_rate": 9.86483530445352e-05,
+ "loss": 0.5091,
+ "step": 25950
+ },
+ {
+ "epoch": 0.13414439095866804,
+ "grad_norm": 19947.34765625,
+ "learning_rate": 9.863990364579876e-05,
+ "loss": 0.5062,
+ "step": 26000
+ },
+ {
+ "epoch": 0.13440236094128089,
+ "grad_norm": 18758.7890625,
+ "learning_rate": 9.863142828402361e-05,
+ "loss": 0.5099,
+ "step": 26050
+ },
+ {
+ "epoch": 0.1346603309238937,
+ "grad_norm": 18494.076171875,
+ "learning_rate": 9.862292696373372e-05,
+ "loss": 0.5043,
+ "step": 26100
+ },
+ {
+ "epoch": 0.13491830090650653,
+ "grad_norm": 19646.841796875,
+ "learning_rate": 9.861439968946696e-05,
+ "loss": 0.508,
+ "step": 26150
+ },
+ {
+ "epoch": 0.13517627088911935,
+ "grad_norm": 19356.009765625,
+ "learning_rate": 9.8605846465775e-05,
+ "loss": 0.5015,
+ "step": 26200
+ },
+ {
+ "epoch": 0.13543424087173217,
+ "grad_norm": 19243.1875,
+ "learning_rate": 9.859726729722341e-05,
+ "loss": 0.5086,
+ "step": 26250
+ },
+ {
+ "epoch": 0.135692210854345,
+ "grad_norm": 20116.43359375,
+ "learning_rate": 9.858866218839156e-05,
+ "loss": 0.5074,
+ "step": 26300
+ },
+ {
+ "epoch": 0.1359501808369578,
+ "grad_norm": 18592.1015625,
+ "learning_rate": 9.858003114387269e-05,
+ "loss": 0.5054,
+ "step": 26350
+ },
+ {
+ "epoch": 0.13620815081957063,
+ "grad_norm": 19552.505859375,
+ "learning_rate": 9.85713741682739e-05,
+ "loss": 0.5042,
+ "step": 26400
+ },
+ {
+ "epoch": 0.13646612080218345,
+ "grad_norm": 18818.142578125,
+ "learning_rate": 9.856269126621611e-05,
+ "loss": 0.5106,
+ "step": 26450
+ },
+ {
+ "epoch": 0.13672409078479628,
+ "grad_norm": 21973.685546875,
+ "learning_rate": 9.855398244233407e-05,
+ "loss": 0.5116,
+ "step": 26500
+ },
+ {
+ "epoch": 0.1369820607674091,
+ "grad_norm": 19296.7890625,
+ "learning_rate": 9.854524770127641e-05,
+ "loss": 0.5103,
+ "step": 26550
+ },
+ {
+ "epoch": 0.13724003075002192,
+ "grad_norm": 18975.22265625,
+ "learning_rate": 9.853648704770554e-05,
+ "loss": 0.5093,
+ "step": 26600
+ },
+ {
+ "epoch": 0.13749800073263474,
+ "grad_norm": 20003.19140625,
+ "learning_rate": 9.852770048629776e-05,
+ "loss": 0.5094,
+ "step": 26650
+ },
+ {
+ "epoch": 0.13775597071524756,
+ "grad_norm": 19885.341796875,
+ "learning_rate": 9.851888802174312e-05,
+ "loss": 0.502,
+ "step": 26700
+ },
+ {
+ "epoch": 0.1380139406978604,
+ "grad_norm": 18030.115234375,
+ "learning_rate": 9.851004965874557e-05,
+ "loss": 0.5045,
+ "step": 26750
+ },
+ {
+ "epoch": 0.13827191068047323,
+ "grad_norm": 19143.369140625,
+ "learning_rate": 9.850118540202286e-05,
+ "loss": 0.5068,
+ "step": 26800
+ },
+ {
+ "epoch": 0.13852988066308605,
+ "grad_norm": 18902.5390625,
+ "learning_rate": 9.849229525630656e-05,
+ "loss": 0.4984,
+ "step": 26850
+ },
+ {
+ "epoch": 0.13878785064569887,
+ "grad_norm": 18523.115234375,
+ "learning_rate": 9.848337922634206e-05,
+ "loss": 0.5099,
+ "step": 26900
+ },
+ {
+ "epoch": 0.1390458206283117,
+ "grad_norm": 19873.283203125,
+ "learning_rate": 9.847443731688852e-05,
+ "loss": 0.5039,
+ "step": 26950
+ },
+ {
+ "epoch": 0.13930379061092452,
+ "grad_norm": 20202.23046875,
+ "learning_rate": 9.846546953271902e-05,
+ "loss": 0.507,
+ "step": 27000
+ },
+ {
+ "epoch": 0.13956176059353734,
+ "grad_norm": 17484.572265625,
+ "learning_rate": 9.845647587862034e-05,
+ "loss": 0.5113,
+ "step": 27050
+ },
+ {
+ "epoch": 0.13981973057615016,
+ "grad_norm": 17931.634765625,
+ "learning_rate": 9.844745635939316e-05,
+ "loss": 0.5051,
+ "step": 27100
+ },
+ {
+ "epoch": 0.14007770055876298,
+ "grad_norm": 20536.693359375,
+ "learning_rate": 9.843841097985191e-05,
+ "loss": 0.5044,
+ "step": 27150
+ },
+ {
+ "epoch": 0.1403356705413758,
+ "grad_norm": 18379.619140625,
+ "learning_rate": 9.842933974482482e-05,
+ "loss": 0.5071,
+ "step": 27200
+ },
+ {
+ "epoch": 0.14059364052398862,
+ "grad_norm": 19097.240234375,
+ "learning_rate": 9.842024265915397e-05,
+ "loss": 0.5046,
+ "step": 27250
+ },
+ {
+ "epoch": 0.14085161050660144,
+ "grad_norm": 22569.80859375,
+ "learning_rate": 9.841111972769517e-05,
+ "loss": 0.5022,
+ "step": 27300
+ },
+ {
+ "epoch": 0.14110958048921426,
+ "grad_norm": 17499.166015625,
+ "learning_rate": 9.84019709553181e-05,
+ "loss": 0.5014,
+ "step": 27350
+ },
+ {
+ "epoch": 0.14136755047182709,
+ "grad_norm": 20447.25,
+ "learning_rate": 9.839279634690619e-05,
+ "loss": 0.5065,
+ "step": 27400
+ },
+ {
+ "epoch": 0.14162552045443993,
+ "grad_norm": 20977.70703125,
+ "learning_rate": 9.838359590735665e-05,
+ "loss": 0.5042,
+ "step": 27450
+ },
+ {
+ "epoch": 0.14188349043705276,
+ "grad_norm": 18168.962890625,
+ "learning_rate": 9.83743696415805e-05,
+ "loss": 0.5043,
+ "step": 27500
+ },
+ {
+ "epoch": 0.14214146041966558,
+ "grad_norm": 18671.841796875,
+ "learning_rate": 9.836511755450256e-05,
+ "loss": 0.5054,
+ "step": 27550
+ },
+ {
+ "epoch": 0.1423994304022784,
+ "grad_norm": 17737.90625,
+ "learning_rate": 9.835583965106141e-05,
+ "loss": 0.507,
+ "step": 27600
+ },
+ {
+ "epoch": 0.14265740038489122,
+ "grad_norm": 23218.873046875,
+ "learning_rate": 9.834653593620939e-05,
+ "loss": 0.5055,
+ "step": 27650
+ },
+ {
+ "epoch": 0.14291537036750404,
+ "grad_norm": 20013.341796875,
+ "learning_rate": 9.833720641491269e-05,
+ "loss": 0.5008,
+ "step": 27700
+ },
+ {
+ "epoch": 0.14317334035011686,
+ "grad_norm": 21755.08203125,
+ "learning_rate": 9.832785109215119e-05,
+ "loss": 0.5029,
+ "step": 27750
+ },
+ {
+ "epoch": 0.14343131033272968,
+ "grad_norm": 18450.541015625,
+ "learning_rate": 9.831846997291859e-05,
+ "loss": 0.5086,
+ "step": 27800
+ },
+ {
+ "epoch": 0.1436892803153425,
+ "grad_norm": 17578.990234375,
+ "learning_rate": 9.830906306222235e-05,
+ "loss": 0.498,
+ "step": 27850
+ },
+ {
+ "epoch": 0.14394725029795533,
+ "grad_norm": 18771.2578125,
+ "learning_rate": 9.82996303650837e-05,
+ "loss": 0.5006,
+ "step": 27900
+ },
+ {
+ "epoch": 0.14420522028056815,
+ "grad_norm": 19841.912109375,
+ "learning_rate": 9.829017188653763e-05,
+ "loss": 0.5003,
+ "step": 27950
+ },
+ {
+ "epoch": 0.14446319026318097,
+ "grad_norm": 19089.384765625,
+ "learning_rate": 9.82806876316329e-05,
+ "loss": 0.5028,
+ "step": 28000
+ },
+ {
+ "epoch": 0.1447211602457938,
+ "grad_norm": 17971.998046875,
+ "learning_rate": 9.827117760543198e-05,
+ "loss": 0.5103,
+ "step": 28050
+ },
+ {
+ "epoch": 0.1449791302284066,
+ "grad_norm": 19590.46875,
+ "learning_rate": 9.826164181301121e-05,
+ "loss": 0.5075,
+ "step": 28100
+ },
+ {
+ "epoch": 0.14523710021101943,
+ "grad_norm": 19316.150390625,
+ "learning_rate": 9.825208025946056e-05,
+ "loss": 0.4971,
+ "step": 28150
+ },
+ {
+ "epoch": 0.14549507019363228,
+ "grad_norm": 19814.3125,
+ "learning_rate": 9.82424929498838e-05,
+ "loss": 0.501,
+ "step": 28200
+ },
+ {
+ "epoch": 0.1457530401762451,
+ "grad_norm": 18669.203125,
+ "learning_rate": 9.823287988939847e-05,
+ "loss": 0.5027,
+ "step": 28250
+ },
+ {
+ "epoch": 0.14601101015885792,
+ "grad_norm": 20375.48828125,
+ "learning_rate": 9.822324108313585e-05,
+ "loss": 0.4966,
+ "step": 28300
+ },
+ {
+ "epoch": 0.14626898014147074,
+ "grad_norm": 19665.4296875,
+ "learning_rate": 9.82135765362409e-05,
+ "loss": 0.4966,
+ "step": 28350
+ },
+ {
+ "epoch": 0.14652695012408357,
+ "grad_norm": 19579.771484375,
+ "learning_rate": 9.820388625387242e-05,
+ "loss": 0.5028,
+ "step": 28400
+ },
+ {
+ "epoch": 0.1467849201066964,
+ "grad_norm": 20270.564453125,
+ "learning_rate": 9.819417024120285e-05,
+ "loss": 0.4972,
+ "step": 28450
+ },
+ {
+ "epoch": 0.1470428900893092,
+ "grad_norm": 20025.6328125,
+ "learning_rate": 9.818442850341845e-05,
+ "loss": 0.5082,
+ "step": 28500
+ },
+ {
+ "epoch": 0.14730086007192203,
+ "grad_norm": 19062.525390625,
+ "learning_rate": 9.817466104571915e-05,
+ "loss": 0.4983,
+ "step": 28550
+ },
+ {
+ "epoch": 0.14755883005453485,
+ "grad_norm": 18558.0390625,
+ "learning_rate": 9.816486787331862e-05,
+ "loss": 0.5004,
+ "step": 28600
+ },
+ {
+ "epoch": 0.14781680003714767,
+ "grad_norm": 20880.6875,
+ "learning_rate": 9.815504899144428e-05,
+ "loss": 0.5036,
+ "step": 28650
+ },
+ {
+ "epoch": 0.1480747700197605,
+ "grad_norm": 19120.3359375,
+ "learning_rate": 9.814520440533726e-05,
+ "loss": 0.5004,
+ "step": 28700
+ },
+ {
+ "epoch": 0.14833274000237331,
+ "grad_norm": 17185.451171875,
+ "learning_rate": 9.813533412025242e-05,
+ "loss": 0.5047,
+ "step": 28750
+ },
+ {
+ "epoch": 0.14859070998498614,
+ "grad_norm": 21795.697265625,
+ "learning_rate": 9.81254381414583e-05,
+ "loss": 0.5033,
+ "step": 28800
+ },
+ {
+ "epoch": 0.14884867996759896,
+ "grad_norm": 21923.44140625,
+ "learning_rate": 9.811551647423718e-05,
+ "loss": 0.4957,
+ "step": 28850
+ },
+ {
+ "epoch": 0.1491066499502118,
+ "grad_norm": 18988.30078125,
+ "learning_rate": 9.810556912388509e-05,
+ "loss": 0.4979,
+ "step": 28900
+ },
+ {
+ "epoch": 0.14936461993282463,
+ "grad_norm": 18479.74609375,
+ "learning_rate": 9.809559609571169e-05,
+ "loss": 0.5003,
+ "step": 28950
+ },
+ {
+ "epoch": 0.14962258991543745,
+ "grad_norm": 20426.57421875,
+ "learning_rate": 9.808559739504043e-05,
+ "loss": 0.5019,
+ "step": 29000
+ },
+ {
+ "epoch": 0.14988055989805027,
+ "grad_norm": 20044.365234375,
+ "learning_rate": 9.80755730272084e-05,
+ "loss": 0.5012,
+ "step": 29050
+ },
+ {
+ "epoch": 0.1501385298806631,
+ "grad_norm": 18321.439453125,
+ "learning_rate": 9.806552299756641e-05,
+ "loss": 0.4918,
+ "step": 29100
+ },
+ {
+ "epoch": 0.1503964998632759,
+ "grad_norm": 20315.681640625,
+ "learning_rate": 9.805544731147899e-05,
+ "loss": 0.5015,
+ "step": 29150
+ },
+ {
+ "epoch": 0.15065446984588873,
+ "grad_norm": 20399.990234375,
+ "learning_rate": 9.804534597432432e-05,
+ "loss": 0.4967,
+ "step": 29200
+ },
+ {
+ "epoch": 0.15091243982850155,
+ "grad_norm": 19539.91796875,
+ "learning_rate": 9.803521899149432e-05,
+ "loss": 0.5002,
+ "step": 29250
+ },
+ {
+ "epoch": 0.15117040981111438,
+ "grad_norm": 20317.970703125,
+ "learning_rate": 9.802506636839457e-05,
+ "loss": 0.4988,
+ "step": 29300
+ },
+ {
+ "epoch": 0.1514283797937272,
+ "grad_norm": 18728.32421875,
+ "learning_rate": 9.801488811044434e-05,
+ "loss": 0.5007,
+ "step": 29350
+ },
+ {
+ "epoch": 0.15168634977634002,
+ "grad_norm": 21256.51171875,
+ "learning_rate": 9.80046842230766e-05,
+ "loss": 0.5066,
+ "step": 29400
+ },
+ {
+ "epoch": 0.15194431975895284,
+ "grad_norm": 18871.8828125,
+ "learning_rate": 9.799445471173799e-05,
+ "loss": 0.502,
+ "step": 29450
+ },
+ {
+ "epoch": 0.15220228974156566,
+ "grad_norm": 18434.251953125,
+ "learning_rate": 9.798419958188878e-05,
+ "loss": 0.5018,
+ "step": 29500
+ },
+ {
+ "epoch": 0.15246025972417848,
+ "grad_norm": 18562.412109375,
+ "learning_rate": 9.7973918839003e-05,
+ "loss": 0.4978,
+ "step": 29550
+ },
+ {
+ "epoch": 0.15271822970679133,
+ "grad_norm": 20020.7890625,
+ "learning_rate": 9.796361248856832e-05,
+ "loss": 0.4989,
+ "step": 29600
+ },
+ {
+ "epoch": 0.15297619968940415,
+ "grad_norm": 20026.6015625,
+ "learning_rate": 9.795328053608606e-05,
+ "loss": 0.5002,
+ "step": 29650
+ },
+ {
+ "epoch": 0.15323416967201697,
+ "grad_norm": 20098.703125,
+ "learning_rate": 9.794292298707119e-05,
+ "loss": 0.4938,
+ "step": 29700
+ },
+ {
+ "epoch": 0.1534921396546298,
+ "grad_norm": 18960.154296875,
+ "learning_rate": 9.793253984705239e-05,
+ "loss": 0.4956,
+ "step": 29750
+ },
+ {
+ "epoch": 0.15375010963724262,
+ "grad_norm": 20478.669921875,
+ "learning_rate": 9.7922131121572e-05,
+ "loss": 0.4998,
+ "step": 29800
+ },
+ {
+ "epoch": 0.15400807961985544,
+ "grad_norm": 20406.701171875,
+ "learning_rate": 9.791169681618596e-05,
+ "loss": 0.5083,
+ "step": 29850
+ },
+ {
+ "epoch": 0.15426604960246826,
+ "grad_norm": 17598.75390625,
+ "learning_rate": 9.790123693646391e-05,
+ "loss": 0.4968,
+ "step": 29900
+ },
+ {
+ "epoch": 0.15452401958508108,
+ "grad_norm": 19622.521484375,
+ "learning_rate": 9.789075148798915e-05,
+ "loss": 0.4881,
+ "step": 29950
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "grad_norm": 20092.87109375,
+ "learning_rate": 9.78802404763586e-05,
+ "loss": 0.4994,
+ "step": 30000
+ },
+ {
+ "epoch": 0.1547819895676939,
+ "eval_loss": 0.4904574453830719,
+ "eval_runtime": 3267.2095,
+ "eval_samples_per_second": 949.165,
+ "eval_steps_per_second": 1.854,
+ "step": 30000
+ },
+ {
+ "epoch": 0.15503995955030672,
+ "grad_norm": 19136.84765625,
+ "learning_rate": 9.786970390718282e-05,
+ "loss": 0.4963,
+ "step": 30050
+ },
+ {
+ "epoch": 0.15529792953291954,
+ "grad_norm": 20464.998046875,
+ "learning_rate": 9.785914178608603e-05,
+ "loss": 0.4994,
+ "step": 30100
+ },
+ {
+ "epoch": 0.15555589951553236,
+ "grad_norm": 23388.55859375,
+ "learning_rate": 9.784855411870611e-05,
+ "loss": 0.5036,
+ "step": 30150
+ },
+ {
+ "epoch": 0.15581386949814519,
+ "grad_norm": 20002.095703125,
+ "learning_rate": 9.783794091069451e-05,
+ "loss": 0.5021,
+ "step": 30200
+ },
+ {
+ "epoch": 0.156071839480758,
+ "grad_norm": 19565.419921875,
+ "learning_rate": 9.782730216771641e-05,
+ "loss": 0.4929,
+ "step": 30250
+ },
+ {
+ "epoch": 0.15632980946337086,
+ "grad_norm": 20284.173828125,
+ "learning_rate": 9.781663789545052e-05,
+ "loss": 0.4889,
+ "step": 30300
+ },
+ {
+ "epoch": 0.15658777944598368,
+ "grad_norm": 18613.439453125,
+ "learning_rate": 9.780594809958922e-05,
+ "loss": 0.496,
+ "step": 30350
+ },
+ {
+ "epoch": 0.1568457494285965,
+ "grad_norm": 19419.1640625,
+ "learning_rate": 9.779523278583855e-05,
+ "loss": 0.4977,
+ "step": 30400
+ },
+ {
+ "epoch": 0.15710371941120932,
+ "grad_norm": 21695.361328125,
+ "learning_rate": 9.778449195991813e-05,
+ "loss": 0.4998,
+ "step": 30450
+ },
+ {
+ "epoch": 0.15736168939382214,
+ "grad_norm": 21914.3828125,
+ "learning_rate": 9.777372562756117e-05,
+ "loss": 0.4936,
+ "step": 30500
+ },
+ {
+ "epoch": 0.15761965937643496,
+ "grad_norm": 22384.525390625,
+ "learning_rate": 9.776293379451458e-05,
+ "loss": 0.5034,
+ "step": 30550
+ },
+ {
+ "epoch": 0.15787762935904778,
+ "grad_norm": 21174.220703125,
+ "learning_rate": 9.775211646653879e-05,
+ "loss": 0.4928,
+ "step": 30600
+ },
+ {
+ "epoch": 0.1581355993416606,
+ "grad_norm": 19809.953125,
+ "learning_rate": 9.77412736494079e-05,
+ "loss": 0.5014,
+ "step": 30650
+ },
+ {
+ "epoch": 0.15839356932427343,
+ "grad_norm": 19657.048828125,
+ "learning_rate": 9.773040534890958e-05,
+ "loss": 0.5022,
+ "step": 30700
+ },
+ {
+ "epoch": 0.15865153930688625,
+ "grad_norm": 20559.490234375,
+ "learning_rate": 9.771951157084514e-05,
+ "loss": 0.4923,
+ "step": 30750
+ },
+ {
+ "epoch": 0.15890950928949907,
+ "grad_norm": 19473.294921875,
+ "learning_rate": 9.770859232102946e-05,
+ "loss": 0.4991,
+ "step": 30800
+ },
+ {
+ "epoch": 0.1591674792721119,
+ "grad_norm": 19243.509765625,
+ "learning_rate": 9.769764760529102e-05,
+ "loss": 0.4934,
+ "step": 30850
+ },
+ {
+ "epoch": 0.1594254492547247,
+ "grad_norm": 20882.853515625,
+ "learning_rate": 9.768667742947189e-05,
+ "loss": 0.4989,
+ "step": 30900
+ },
+ {
+ "epoch": 0.15968341923733753,
+ "grad_norm": 19654.17578125,
+ "learning_rate": 9.767568179942776e-05,
+ "loss": 0.501,
+ "step": 30950
+ },
+ {
+ "epoch": 0.15994138921995035,
+ "grad_norm": 20069.412109375,
+ "learning_rate": 9.766466072102786e-05,
+ "loss": 0.5001,
+ "step": 31000
+ },
+ {
+ "epoch": 0.1601993592025632,
+ "grad_norm": 19730.416015625,
+ "learning_rate": 9.765361420015506e-05,
+ "loss": 0.4947,
+ "step": 31050
+ },
+ {
+ "epoch": 0.16045732918517602,
+ "grad_norm": 19825.43359375,
+ "learning_rate": 9.764254224270573e-05,
+ "loss": 0.5012,
+ "step": 31100
+ },
+ {
+ "epoch": 0.16071529916778884,
+ "grad_norm": 19111.859375,
+ "learning_rate": 9.763144485458992e-05,
+ "loss": 0.4946,
+ "step": 31150
+ },
+ {
+ "epoch": 0.16097326915040167,
+ "grad_norm": 20071.7578125,
+ "learning_rate": 9.762032204173116e-05,
+ "loss": 0.4961,
+ "step": 31200
+ },
+ {
+ "epoch": 0.1612312391330145,
+ "grad_norm": 18780.638671875,
+ "learning_rate": 9.76091738100666e-05,
+ "loss": 0.4952,
+ "step": 31250
+ },
+ {
+ "epoch": 0.1614892091156273,
+ "grad_norm": 20192.69140625,
+ "learning_rate": 9.759800016554699e-05,
+ "loss": 0.4919,
+ "step": 31300
+ },
+ {
+ "epoch": 0.16174717909824013,
+ "grad_norm": 18430.57421875,
+ "learning_rate": 9.758680111413653e-05,
+ "loss": 0.4953,
+ "step": 31350
+ },
+ {
+ "epoch": 0.16200514908085295,
+ "grad_norm": 18921.740234375,
+ "learning_rate": 9.757557666181314e-05,
+ "loss": 0.5013,
+ "step": 31400
+ },
+ {
+ "epoch": 0.16226311906346577,
+ "grad_norm": 18918.857421875,
+ "learning_rate": 9.756432681456815e-05,
+ "loss": 0.4976,
+ "step": 31450
+ },
+ {
+ "epoch": 0.1625210890460786,
+ "grad_norm": 21373.814453125,
+ "learning_rate": 9.755305157840655e-05,
+ "loss": 0.4975,
+ "step": 31500
+ },
+ {
+ "epoch": 0.16277905902869141,
+ "grad_norm": 19509.482421875,
+ "learning_rate": 9.754175095934684e-05,
+ "loss": 0.4966,
+ "step": 31550
+ },
+ {
+ "epoch": 0.16303702901130424,
+ "grad_norm": 18362.125,
+ "learning_rate": 9.753042496342103e-05,
+ "loss": 0.505,
+ "step": 31600
+ },
+ {
+ "epoch": 0.16329499899391706,
+ "grad_norm": 20344.11328125,
+ "learning_rate": 9.751907359667476e-05,
+ "loss": 0.4988,
+ "step": 31650
+ },
+ {
+ "epoch": 0.16355296897652988,
+ "grad_norm": 21398.97265625,
+ "learning_rate": 9.750769686516715e-05,
+ "loss": 0.493,
+ "step": 31700
+ },
+ {
+ "epoch": 0.16381093895914273,
+ "grad_norm": 21106.955078125,
+ "learning_rate": 9.74962947749709e-05,
+ "loss": 0.4999,
+ "step": 31750
+ },
+ {
+ "epoch": 0.16406890894175555,
+ "grad_norm": 19787.216796875,
+ "learning_rate": 9.74848673321722e-05,
+ "loss": 0.4932,
+ "step": 31800
+ },
+ {
+ "epoch": 0.16432687892436837,
+ "grad_norm": 19198.83984375,
+ "learning_rate": 9.747341454287082e-05,
+ "loss": 0.4919,
+ "step": 31850
+ },
+ {
+ "epoch": 0.1645848489069812,
+ "grad_norm": 18460.92578125,
+ "learning_rate": 9.746193641318002e-05,
+ "loss": 0.495,
+ "step": 31900
+ },
+ {
+ "epoch": 0.164842818889594,
+ "grad_norm": 18591.427734375,
+ "learning_rate": 9.74504329492266e-05,
+ "loss": 0.4888,
+ "step": 31950
+ },
+ {
+ "epoch": 0.16510078887220683,
+ "grad_norm": 21651.3515625,
+ "learning_rate": 9.743890415715091e-05,
+ "loss": 0.4909,
+ "step": 32000
+ },
+ {
+ "epoch": 0.16535875885481965,
+ "grad_norm": 18884.486328125,
+ "learning_rate": 9.742735004310677e-05,
+ "loss": 0.4981,
+ "step": 32050
+ },
+ {
+ "epoch": 0.16561672883743248,
+ "grad_norm": 19223.658203125,
+ "learning_rate": 9.741577061326157e-05,
+ "loss": 0.4961,
+ "step": 32100
+ },
+ {
+ "epoch": 0.1658746988200453,
+ "grad_norm": 18266.560546875,
+ "learning_rate": 9.740416587379615e-05,
+ "loss": 0.4914,
+ "step": 32150
+ },
+ {
+ "epoch": 0.16613266880265812,
+ "grad_norm": 19871.509765625,
+ "learning_rate": 9.739253583090493e-05,
+ "loss": 0.499,
+ "step": 32200
+ },
+ {
+ "epoch": 0.16639063878527094,
+ "grad_norm": 19524.298828125,
+ "learning_rate": 9.738088049079577e-05,
+ "loss": 0.4944,
+ "step": 32250
+ },
+ {
+ "epoch": 0.16664860876788376,
+ "grad_norm": 20308.685546875,
+ "learning_rate": 9.73691998596901e-05,
+ "loss": 0.4941,
+ "step": 32300
+ },
+ {
+ "epoch": 0.16690657875049658,
+ "grad_norm": 19125.52734375,
+ "learning_rate": 9.735749394382278e-05,
+ "loss": 0.4968,
+ "step": 32350
+ },
+ {
+ "epoch": 0.1671645487331094,
+ "grad_norm": 18792.716796875,
+ "learning_rate": 9.734576274944223e-05,
+ "loss": 0.4959,
+ "step": 32400
+ },
+ {
+ "epoch": 0.16742251871572225,
+ "grad_norm": 18521.54296875,
+ "learning_rate": 9.73340062828103e-05,
+ "loss": 0.4913,
+ "step": 32450
+ },
+ {
+ "epoch": 0.16768048869833507,
+ "grad_norm": 19540.41796875,
+ "learning_rate": 9.732222455020241e-05,
+ "loss": 0.4999,
+ "step": 32500
+ },
+ {
+ "epoch": 0.1679384586809479,
+ "grad_norm": 18682.84375,
+ "learning_rate": 9.73104175579074e-05,
+ "loss": 0.4991,
+ "step": 32550
+ },
+ {
+ "epoch": 0.16819642866356072,
+ "grad_norm": 20134.8984375,
+ "learning_rate": 9.72985853122276e-05,
+ "loss": 0.4839,
+ "step": 32600
+ },
+ {
+ "epoch": 0.16845439864617354,
+ "grad_norm": 20375.1484375,
+ "learning_rate": 9.728672781947883e-05,
+ "loss": 0.4941,
+ "step": 32650
+ },
+ {
+ "epoch": 0.16871236862878636,
+ "grad_norm": 19720.98046875,
+ "learning_rate": 9.727484508599042e-05,
+ "loss": 0.4856,
+ "step": 32700
+ },
+ {
+ "epoch": 0.16897033861139918,
+ "grad_norm": 19408.7734375,
+ "learning_rate": 9.726293711810513e-05,
+ "loss": 0.4942,
+ "step": 32750
+ },
+ {
+ "epoch": 0.169228308594012,
+ "grad_norm": 20136.892578125,
+ "learning_rate": 9.725100392217919e-05,
+ "loss": 0.4942,
+ "step": 32800
+ },
+ {
+ "epoch": 0.16948627857662482,
+ "grad_norm": 20555.27734375,
+ "learning_rate": 9.723904550458232e-05,
+ "loss": 0.4907,
+ "step": 32850
+ },
+ {
+ "epoch": 0.16974424855923764,
+ "grad_norm": 18876.787109375,
+ "learning_rate": 9.722706187169769e-05,
+ "loss": 0.4951,
+ "step": 32900
+ },
+ {
+ "epoch": 0.17000221854185046,
+ "grad_norm": 19918.4296875,
+ "learning_rate": 9.721505302992194e-05,
+ "loss": 0.4871,
+ "step": 32950
+ },
+ {
+ "epoch": 0.17026018852446329,
+ "grad_norm": 18593.453125,
+ "learning_rate": 9.720301898566513e-05,
+ "loss": 0.4889,
+ "step": 33000
+ },
+ {
+ "epoch": 0.1705181585070761,
+ "grad_norm": 21007.5625,
+ "learning_rate": 9.719095974535084e-05,
+ "loss": 0.4936,
+ "step": 33050
+ },
+ {
+ "epoch": 0.17077612848968893,
+ "grad_norm": 21749.849609375,
+ "learning_rate": 9.717887531541601e-05,
+ "loss": 0.4915,
+ "step": 33100
+ },
+ {
+ "epoch": 0.17103409847230178,
+ "grad_norm": 19097.896484375,
+ "learning_rate": 9.716676570231114e-05,
+ "loss": 0.4857,
+ "step": 33150
+ },
+ {
+ "epoch": 0.1712920684549146,
+ "grad_norm": 18509.107421875,
+ "learning_rate": 9.715463091250003e-05,
+ "loss": 0.487,
+ "step": 33200
+ },
+ {
+ "epoch": 0.17155003843752742,
+ "grad_norm": 21414.916015625,
+ "learning_rate": 9.714247095246007e-05,
+ "loss": 0.4929,
+ "step": 33250
+ },
+ {
+ "epoch": 0.17180800842014024,
+ "grad_norm": 19836.978515625,
+ "learning_rate": 9.713028582868196e-05,
+ "loss": 0.4948,
+ "step": 33300
+ },
+ {
+ "epoch": 0.17206597840275306,
+ "grad_norm": 18013.787109375,
+ "learning_rate": 9.71180755476699e-05,
+ "loss": 0.4945,
+ "step": 33350
+ },
+ {
+ "epoch": 0.17232394838536588,
+ "grad_norm": 18498.1640625,
+ "learning_rate": 9.71058401159415e-05,
+ "loss": 0.4961,
+ "step": 33400
+ },
+ {
+ "epoch": 0.1725819183679787,
+ "grad_norm": 19871.404296875,
+ "learning_rate": 9.709357954002778e-05,
+ "loss": 0.4896,
+ "step": 33450
+ },
+ {
+ "epoch": 0.17283988835059153,
+ "grad_norm": 20794.05859375,
+ "learning_rate": 9.708129382647324e-05,
+ "loss": 0.4855,
+ "step": 33500
+ },
+ {
+ "epoch": 0.17309785833320435,
+ "grad_norm": 19775.6328125,
+ "learning_rate": 9.706898298183573e-05,
+ "loss": 0.4899,
+ "step": 33550
+ },
+ {
+ "epoch": 0.17335582831581717,
+ "grad_norm": 24329.740234375,
+ "learning_rate": 9.705664701268652e-05,
+ "loss": 0.4879,
+ "step": 33600
+ },
+ {
+ "epoch": 0.17361379829843,
+ "grad_norm": 19666.697265625,
+ "learning_rate": 9.704428592561037e-05,
+ "loss": 0.493,
+ "step": 33650
+ },
+ {
+ "epoch": 0.1738717682810428,
+ "grad_norm": 20382.115234375,
+ "learning_rate": 9.703189972720532e-05,
+ "loss": 0.4922,
+ "step": 33700
+ },
+ {
+ "epoch": 0.17412973826365563,
+ "grad_norm": 20240.46875,
+ "learning_rate": 9.701948842408293e-05,
+ "loss": 0.4908,
+ "step": 33750
+ },
+ {
+ "epoch": 0.17438770824626845,
+ "grad_norm": 18531.224609375,
+ "learning_rate": 9.700705202286811e-05,
+ "loss": 0.489,
+ "step": 33800
+ },
+ {
+ "epoch": 0.17464567822888127,
+ "grad_norm": 19121.0625,
+ "learning_rate": 9.699459053019912e-05,
+ "loss": 0.4884,
+ "step": 33850
+ },
+ {
+ "epoch": 0.17490364821149412,
+ "grad_norm": 19959.931640625,
+ "learning_rate": 9.698210395272773e-05,
+ "loss": 0.4912,
+ "step": 33900
+ },
+ {
+ "epoch": 0.17516161819410694,
+ "grad_norm": 18255.732421875,
+ "learning_rate": 9.696959229711901e-05,
+ "loss": 0.4888,
+ "step": 33950
+ },
+ {
+ "epoch": 0.17541958817671977,
+ "grad_norm": 21808.8671875,
+ "learning_rate": 9.695705557005142e-05,
+ "loss": 0.4945,
+ "step": 34000
+ },
+ {
+ "epoch": 0.1756775581593326,
+ "grad_norm": 18687.521484375,
+ "learning_rate": 9.694449377821685e-05,
+ "loss": 0.4891,
+ "step": 34050
+ },
+ {
+ "epoch": 0.1759355281419454,
+ "grad_norm": 18309.859375,
+ "learning_rate": 9.693190692832053e-05,
+ "loss": 0.4888,
+ "step": 34100
+ },
+ {
+ "epoch": 0.17619349812455823,
+ "grad_norm": 19453.705078125,
+ "learning_rate": 9.691929502708106e-05,
+ "loss": 0.4852,
+ "step": 34150
+ },
+ {
+ "epoch": 0.17645146810717105,
+ "grad_norm": 20964.595703125,
+ "learning_rate": 9.690665808123046e-05,
+ "loss": 0.4931,
+ "step": 34200
+ },
+ {
+ "epoch": 0.17670943808978387,
+ "grad_norm": 20170.5703125,
+ "learning_rate": 9.689399609751405e-05,
+ "loss": 0.4908,
+ "step": 34250
+ },
+ {
+ "epoch": 0.1769674080723967,
+ "grad_norm": 18276.19140625,
+ "learning_rate": 9.688130908269058e-05,
+ "loss": 0.4906,
+ "step": 34300
+ },
+ {
+ "epoch": 0.1772253780550095,
+ "grad_norm": 21062.56640625,
+ "learning_rate": 9.686859704353212e-05,
+ "loss": 0.4911,
+ "step": 34350
+ },
+ {
+ "epoch": 0.17748334803762233,
+ "grad_norm": 21678.6796875,
+ "learning_rate": 9.685585998682414e-05,
+ "loss": 0.4894,
+ "step": 34400
+ },
+ {
+ "epoch": 0.17774131802023516,
+ "grad_norm": 17795.384765625,
+ "learning_rate": 9.684309791936539e-05,
+ "loss": 0.4893,
+ "step": 34450
+ },
+ {
+ "epoch": 0.17799928800284798,
+ "grad_norm": 21536.837890625,
+ "learning_rate": 9.683031084796803e-05,
+ "loss": 0.4889,
+ "step": 34500
+ },
+ {
+ "epoch": 0.1782572579854608,
+ "grad_norm": 20554.423828125,
+ "learning_rate": 9.681749877945756e-05,
+ "loss": 0.4843,
+ "step": 34550
+ },
+ {
+ "epoch": 0.17851522796807365,
+ "grad_norm": 22045.376953125,
+ "learning_rate": 9.680466172067282e-05,
+ "loss": 0.4895,
+ "step": 34600
+ },
+ {
+ "epoch": 0.17877319795068647,
+ "grad_norm": 21406.853515625,
+ "learning_rate": 9.679179967846597e-05,
+ "loss": 0.4914,
+ "step": 34650
+ },
+ {
+ "epoch": 0.1790311679332993,
+ "grad_norm": 20971.037109375,
+ "learning_rate": 9.677891265970252e-05,
+ "loss": 0.485,
+ "step": 34700
+ },
+ {
+ "epoch": 0.1792891379159121,
+ "grad_norm": 20256.73828125,
+ "learning_rate": 9.676600067126129e-05,
+ "loss": 0.4918,
+ "step": 34750
+ },
+ {
+ "epoch": 0.17954710789852493,
+ "grad_norm": 19123.048828125,
+ "learning_rate": 9.67530637200345e-05,
+ "loss": 0.49,
+ "step": 34800
+ },
+ {
+ "epoch": 0.17980507788113775,
+ "grad_norm": 20799.748046875,
+ "learning_rate": 9.674010181292761e-05,
+ "loss": 0.4889,
+ "step": 34850
+ },
+ {
+ "epoch": 0.18006304786375057,
+ "grad_norm": 19569.609375,
+ "learning_rate": 9.672711495685945e-05,
+ "loss": 0.4882,
+ "step": 34900
+ },
+ {
+ "epoch": 0.1803210178463634,
+ "grad_norm": 18339.76171875,
+ "learning_rate": 9.671410315876213e-05,
+ "loss": 0.4884,
+ "step": 34950
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "grad_norm": 20066.099609375,
+ "learning_rate": 9.670106642558111e-05,
+ "loss": 0.4866,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18057898782897622,
+ "eval_loss": 0.48020538687705994,
+ "eval_runtime": 3265.3619,
+ "eval_samples_per_second": 949.702,
+ "eval_steps_per_second": 1.855,
+ "step": 35000
+ },
+ {
+ "epoch": 0.18083695781158904,
+ "grad_norm": 18703.037109375,
+ "learning_rate": 9.668800476427515e-05,
+ "loss": 0.4953,
+ "step": 35050
+ },
+ {
+ "epoch": 0.18109492779420186,
+ "grad_norm": 19886.177734375,
+ "learning_rate": 9.667491818181631e-05,
+ "loss": 0.4845,
+ "step": 35100
+ },
+ {
+ "epoch": 0.18135289777681468,
+ "grad_norm": 19349.08203125,
+ "learning_rate": 9.666180668518993e-05,
+ "loss": 0.493,
+ "step": 35150
+ },
+ {
+ "epoch": 0.1816108677594275,
+ "grad_norm": 19786.404296875,
+ "learning_rate": 9.664867028139473e-05,
+ "loss": 0.4815,
+ "step": 35200
+ },
+ {
+ "epoch": 0.18186883774204032,
+ "grad_norm": 21271.05859375,
+ "learning_rate": 9.66355089774426e-05,
+ "loss": 0.4907,
+ "step": 35250
+ },
+ {
+ "epoch": 0.18212680772465317,
+ "grad_norm": 19096.3125,
+ "learning_rate": 9.662232278035885e-05,
+ "loss": 0.4865,
+ "step": 35300
+ },
+ {
+ "epoch": 0.182384777707266,
+ "grad_norm": 20136.935546875,
+ "learning_rate": 9.660911169718196e-05,
+ "loss": 0.4824,
+ "step": 35350
+ },
+ {
+ "epoch": 0.18264274768987881,
+ "grad_norm": 19532.361328125,
+ "learning_rate": 9.65958757349638e-05,
+ "loss": 0.4857,
+ "step": 35400
+ },
+ {
+ "epoch": 0.18290071767249164,
+ "grad_norm": 18227.626953125,
+ "learning_rate": 9.658261490076944e-05,
+ "loss": 0.4871,
+ "step": 35450
+ },
+ {
+ "epoch": 0.18315868765510446,
+ "grad_norm": 21021.564453125,
+ "learning_rate": 9.656932920167727e-05,
+ "loss": 0.485,
+ "step": 35500
+ },
+ {
+ "epoch": 0.18341665763771728,
+ "grad_norm": 19943.9765625,
+ "learning_rate": 9.655601864477893e-05,
+ "loss": 0.4908,
+ "step": 35550
+ },
+ {
+ "epoch": 0.1836746276203301,
+ "grad_norm": 19356.8203125,
+ "learning_rate": 9.654268323717934e-05,
+ "loss": 0.4849,
+ "step": 35600
+ },
+ {
+ "epoch": 0.18393259760294292,
+ "grad_norm": 19431.9453125,
+ "learning_rate": 9.652932298599671e-05,
+ "loss": 0.4927,
+ "step": 35650
+ },
+ {
+ "epoch": 0.18419056758555574,
+ "grad_norm": 18860.0625,
+ "learning_rate": 9.651593789836242e-05,
+ "loss": 0.4879,
+ "step": 35700
+ },
+ {
+ "epoch": 0.18444853756816856,
+ "grad_norm": 18524.46875,
+ "learning_rate": 9.650252798142123e-05,
+ "loss": 0.4877,
+ "step": 35750
+ },
+ {
+ "epoch": 0.18470650755078138,
+ "grad_norm": 18897.322265625,
+ "learning_rate": 9.648909324233107e-05,
+ "loss": 0.4906,
+ "step": 35800
+ },
+ {
+ "epoch": 0.1849644775333942,
+ "grad_norm": 21080.552734375,
+ "learning_rate": 9.647563368826313e-05,
+ "loss": 0.4895,
+ "step": 35850
+ },
+ {
+ "epoch": 0.18522244751600703,
+ "grad_norm": 20014.828125,
+ "learning_rate": 9.64621493264019e-05,
+ "loss": 0.4816,
+ "step": 35900
+ },
+ {
+ "epoch": 0.18548041749861985,
+ "grad_norm": 19470.3984375,
+ "learning_rate": 9.644864016394504e-05,
+ "loss": 0.4812,
+ "step": 35950
+ },
+ {
+ "epoch": 0.1857383874812327,
+ "grad_norm": 21915.400390625,
+ "learning_rate": 9.643510620810348e-05,
+ "loss": 0.4859,
+ "step": 36000
+ },
+ {
+ "epoch": 0.18599635746384552,
+ "grad_norm": 19367.009765625,
+ "learning_rate": 9.642154746610139e-05,
+ "loss": 0.4905,
+ "step": 36050
+ },
+ {
+ "epoch": 0.18625432744645834,
+ "grad_norm": 18379.70703125,
+ "learning_rate": 9.640796394517616e-05,
+ "loss": 0.4878,
+ "step": 36100
+ },
+ {
+ "epoch": 0.18651229742907116,
+ "grad_norm": 18933.455078125,
+ "learning_rate": 9.639435565257842e-05,
+ "loss": 0.4877,
+ "step": 36150
+ },
+ {
+ "epoch": 0.18677026741168398,
+ "grad_norm": 19026.484375,
+ "learning_rate": 9.638072259557201e-05,
+ "loss": 0.4873,
+ "step": 36200
+ },
+ {
+ "epoch": 0.1870282373942968,
+ "grad_norm": 21111.09375,
+ "learning_rate": 9.636706478143398e-05,
+ "loss": 0.4815,
+ "step": 36250
+ },
+ {
+ "epoch": 0.18728620737690962,
+ "grad_norm": 19362.541015625,
+ "learning_rate": 9.635338221745462e-05,
+ "loss": 0.4854,
+ "step": 36300
+ },
+ {
+ "epoch": 0.18754417735952245,
+ "grad_norm": 19861.58984375,
+ "learning_rate": 9.63396749109374e-05,
+ "loss": 0.4832,
+ "step": 36350
+ },
+ {
+ "epoch": 0.18780214734213527,
+ "grad_norm": 18793.623046875,
+ "learning_rate": 9.632594286919905e-05,
+ "loss": 0.4811,
+ "step": 36400
+ },
+ {
+ "epoch": 0.1880601173247481,
+ "grad_norm": 20452.26953125,
+ "learning_rate": 9.631218609956943e-05,
+ "loss": 0.4872,
+ "step": 36450
+ },
+ {
+ "epoch": 0.1883180873073609,
+ "grad_norm": 19237.203125,
+ "learning_rate": 9.629840460939165e-05,
+ "loss": 0.4941,
+ "step": 36500
+ },
+ {
+ "epoch": 0.18857605728997373,
+ "grad_norm": 19828.84765625,
+ "learning_rate": 9.628459840602202e-05,
+ "loss": 0.4869,
+ "step": 36550
+ },
+ {
+ "epoch": 0.18883402727258655,
+ "grad_norm": 18171.08203125,
+ "learning_rate": 9.627076749683e-05,
+ "loss": 0.4915,
+ "step": 36600
+ },
+ {
+ "epoch": 0.18909199725519937,
+ "grad_norm": 21346.9375,
+ "learning_rate": 9.625691188919827e-05,
+ "loss": 0.4913,
+ "step": 36650
+ },
+ {
+ "epoch": 0.1893499672378122,
+ "grad_norm": 20066.7890625,
+ "learning_rate": 9.62430315905227e-05,
+ "loss": 0.4809,
+ "step": 36700
+ },
+ {
+ "epoch": 0.18960793722042504,
+ "grad_norm": 20736.546875,
+ "learning_rate": 9.622912660821231e-05,
+ "loss": 0.4849,
+ "step": 36750
+ },
+ {
+ "epoch": 0.18986590720303786,
+ "grad_norm": 20891.958984375,
+ "learning_rate": 9.62151969496893e-05,
+ "loss": 0.4831,
+ "step": 36800
+ },
+ {
+ "epoch": 0.19012387718565069,
+ "grad_norm": 21394.1953125,
+ "learning_rate": 9.620124262238908e-05,
+ "loss": 0.4855,
+ "step": 36850
+ },
+ {
+ "epoch": 0.1903818471682635,
+ "grad_norm": 19725.89453125,
+ "learning_rate": 9.618726363376016e-05,
+ "loss": 0.48,
+ "step": 36900
+ },
+ {
+ "epoch": 0.19063981715087633,
+ "grad_norm": 21622.78125,
+ "learning_rate": 9.617325999126429e-05,
+ "loss": 0.4832,
+ "step": 36950
+ },
+ {
+ "epoch": 0.19089778713348915,
+ "grad_norm": 22529.548828125,
+ "learning_rate": 9.615923170237633e-05,
+ "loss": 0.4852,
+ "step": 37000
+ },
+ {
+ "epoch": 0.19115575711610197,
+ "grad_norm": 21136.404296875,
+ "learning_rate": 9.614517877458428e-05,
+ "loss": 0.4816,
+ "step": 37050
+ },
+ {
+ "epoch": 0.1914137270987148,
+ "grad_norm": 19039.330078125,
+ "learning_rate": 9.61311012153894e-05,
+ "loss": 0.4835,
+ "step": 37100
+ },
+ {
+ "epoch": 0.1916716970813276,
+ "grad_norm": 19755.974609375,
+ "learning_rate": 9.611699903230594e-05,
+ "loss": 0.4846,
+ "step": 37150
+ },
+ {
+ "epoch": 0.19192966706394043,
+ "grad_norm": 19061.28515625,
+ "learning_rate": 9.610287223286139e-05,
+ "loss": 0.4816,
+ "step": 37200
+ },
+ {
+ "epoch": 0.19218763704655326,
+ "grad_norm": 21649.275390625,
+ "learning_rate": 9.608872082459639e-05,
+ "loss": 0.4837,
+ "step": 37250
+ },
+ {
+ "epoch": 0.19244560702916608,
+ "grad_norm": 19856.759765625,
+ "learning_rate": 9.607454481506466e-05,
+ "loss": 0.4848,
+ "step": 37300
+ },
+ {
+ "epoch": 0.1927035770117789,
+ "grad_norm": 19442.810546875,
+ "learning_rate": 9.60603442118331e-05,
+ "loss": 0.4828,
+ "step": 37350
+ },
+ {
+ "epoch": 0.19296154699439172,
+ "grad_norm": 20076.44140625,
+ "learning_rate": 9.604611902248168e-05,
+ "loss": 0.4896,
+ "step": 37400
+ },
+ {
+ "epoch": 0.19321951697700457,
+ "grad_norm": 18413.908203125,
+ "learning_rate": 9.603186925460359e-05,
+ "loss": 0.4806,
+ "step": 37450
+ },
+ {
+ "epoch": 0.1934774869596174,
+ "grad_norm": 19618.3984375,
+ "learning_rate": 9.601759491580503e-05,
+ "loss": 0.4864,
+ "step": 37500
+ },
+ {
+ "epoch": 0.1937354569422302,
+ "grad_norm": 20347.177734375,
+ "learning_rate": 9.600329601370539e-05,
+ "loss": 0.489,
+ "step": 37550
+ },
+ {
+ "epoch": 0.19399342692484303,
+ "grad_norm": 19288.380859375,
+ "learning_rate": 9.598897255593713e-05,
+ "loss": 0.4829,
+ "step": 37600
+ },
+ {
+ "epoch": 0.19425139690745585,
+ "grad_norm": 20326.1484375,
+ "learning_rate": 9.597462455014585e-05,
+ "loss": 0.4856,
+ "step": 37650
+ },
+ {
+ "epoch": 0.19450936689006867,
+ "grad_norm": 19598.14453125,
+ "learning_rate": 9.596025200399024e-05,
+ "loss": 0.4831,
+ "step": 37700
+ },
+ {
+ "epoch": 0.1947673368726815,
+ "grad_norm": 20041.28125,
+ "learning_rate": 9.594585492514205e-05,
+ "loss": 0.4822,
+ "step": 37750
+ },
+ {
+ "epoch": 0.19502530685529432,
+ "grad_norm": 20853.201171875,
+ "learning_rate": 9.593143332128623e-05,
+ "loss": 0.4874,
+ "step": 37800
+ },
+ {
+ "epoch": 0.19528327683790714,
+ "grad_norm": 21364.455078125,
+ "learning_rate": 9.591698720012068e-05,
+ "loss": 0.482,
+ "step": 37850
+ },
+ {
+ "epoch": 0.19554124682051996,
+ "grad_norm": 18795.447265625,
+ "learning_rate": 9.590251656935652e-05,
+ "loss": 0.489,
+ "step": 37900
+ },
+ {
+ "epoch": 0.19579921680313278,
+ "grad_norm": 23039.455078125,
+ "learning_rate": 9.588802143671784e-05,
+ "loss": 0.4879,
+ "step": 37950
+ },
+ {
+ "epoch": 0.1960571867857456,
+ "grad_norm": 19842.263671875,
+ "learning_rate": 9.58735018099419e-05,
+ "loss": 0.4869,
+ "step": 38000
+ },
+ {
+ "epoch": 0.19631515676835842,
+ "grad_norm": 21241.00390625,
+ "learning_rate": 9.585895769677897e-05,
+ "loss": 0.4746,
+ "step": 38050
+ },
+ {
+ "epoch": 0.19657312675097124,
+ "grad_norm": 19803.2265625,
+ "learning_rate": 9.584438910499245e-05,
+ "loss": 0.4824,
+ "step": 38100
+ },
+ {
+ "epoch": 0.1968310967335841,
+ "grad_norm": 18873.744140625,
+ "learning_rate": 9.582979604235873e-05,
+ "loss": 0.4817,
+ "step": 38150
+ },
+ {
+ "epoch": 0.19708906671619691,
+ "grad_norm": 19128.8828125,
+ "learning_rate": 9.581517851666734e-05,
+ "loss": 0.482,
+ "step": 38200
+ },
+ {
+ "epoch": 0.19734703669880974,
+ "grad_norm": 20514.16796875,
+ "learning_rate": 9.580053653572081e-05,
+ "loss": 0.4781,
+ "step": 38250
+ },
+ {
+ "epoch": 0.19760500668142256,
+ "grad_norm": 19135.58984375,
+ "learning_rate": 9.578587010733475e-05,
+ "loss": 0.4815,
+ "step": 38300
+ },
+ {
+ "epoch": 0.19786297666403538,
+ "grad_norm": 22849.197265625,
+ "learning_rate": 9.577117923933782e-05,
+ "loss": 0.4794,
+ "step": 38350
+ },
+ {
+ "epoch": 0.1981209466466482,
+ "grad_norm": 21278.736328125,
+ "learning_rate": 9.575646393957173e-05,
+ "loss": 0.4832,
+ "step": 38400
+ },
+ {
+ "epoch": 0.19837891662926102,
+ "grad_norm": 19292.162109375,
+ "learning_rate": 9.57417242158912e-05,
+ "loss": 0.4876,
+ "step": 38450
+ },
+ {
+ "epoch": 0.19863688661187384,
+ "grad_norm": 17778.423828125,
+ "learning_rate": 9.572696007616402e-05,
+ "loss": 0.4842,
+ "step": 38500
+ },
+ {
+ "epoch": 0.19889485659448666,
+ "grad_norm": 18855.140625,
+ "learning_rate": 9.5712171528271e-05,
+ "loss": 0.4846,
+ "step": 38550
+ },
+ {
+ "epoch": 0.19915282657709948,
+ "grad_norm": 21640.8203125,
+ "learning_rate": 9.5697358580106e-05,
+ "loss": 0.4829,
+ "step": 38600
+ },
+ {
+ "epoch": 0.1994107965597123,
+ "grad_norm": 19358.3828125,
+ "learning_rate": 9.568252123957586e-05,
+ "loss": 0.4806,
+ "step": 38650
+ },
+ {
+ "epoch": 0.19966876654232513,
+ "grad_norm": 20781.98828125,
+ "learning_rate": 9.566765951460046e-05,
+ "loss": 0.4849,
+ "step": 38700
+ },
+ {
+ "epoch": 0.19992673652493795,
+ "grad_norm": 20604.7265625,
+ "learning_rate": 9.565277341311271e-05,
+ "loss": 0.4856,
+ "step": 38750
+ },
+ {
+ "epoch": 0.20018470650755077,
+ "grad_norm": 20930.048828125,
+ "learning_rate": 9.563786294305854e-05,
+ "loss": 0.4812,
+ "step": 38800
+ },
+ {
+ "epoch": 0.20044267649016362,
+ "grad_norm": 22721.259765625,
+ "learning_rate": 9.562292811239686e-05,
+ "loss": 0.4857,
+ "step": 38850
+ },
+ {
+ "epoch": 0.20070064647277644,
+ "grad_norm": 19667.57421875,
+ "learning_rate": 9.560796892909957e-05,
+ "loss": 0.483,
+ "step": 38900
+ },
+ {
+ "epoch": 0.20095861645538926,
+ "grad_norm": 18259.19140625,
+ "learning_rate": 9.559298540115164e-05,
+ "loss": 0.4851,
+ "step": 38950
+ },
+ {
+ "epoch": 0.20121658643800208,
+ "grad_norm": 20980.18359375,
+ "learning_rate": 9.557797753655096e-05,
+ "loss": 0.4815,
+ "step": 39000
+ },
+ {
+ "epoch": 0.2014745564206149,
+ "grad_norm": 19840.025390625,
+ "learning_rate": 9.556294534330841e-05,
+ "loss": 0.4878,
+ "step": 39050
+ },
+ {
+ "epoch": 0.20173252640322772,
+ "grad_norm": 20406.69921875,
+ "learning_rate": 9.554788882944792e-05,
+ "loss": 0.481,
+ "step": 39100
+ },
+ {
+ "epoch": 0.20199049638584055,
+ "grad_norm": 19177.447265625,
+ "learning_rate": 9.553280800300637e-05,
+ "loss": 0.4857,
+ "step": 39150
+ },
+ {
+ "epoch": 0.20224846636845337,
+ "grad_norm": 21242.21875,
+ "learning_rate": 9.551770287203359e-05,
+ "loss": 0.4889,
+ "step": 39200
+ },
+ {
+ "epoch": 0.2025064363510662,
+ "grad_norm": 19343.58203125,
+ "learning_rate": 9.550257344459241e-05,
+ "loss": 0.482,
+ "step": 39250
+ },
+ {
+ "epoch": 0.202764406333679,
+ "grad_norm": 21327.587890625,
+ "learning_rate": 9.548741972875863e-05,
+ "loss": 0.4802,
+ "step": 39300
+ },
+ {
+ "epoch": 0.20302237631629183,
+ "grad_norm": 21366.98828125,
+ "learning_rate": 9.547224173262102e-05,
+ "loss": 0.4779,
+ "step": 39350
+ },
+ {
+ "epoch": 0.20328034629890465,
+ "grad_norm": 20876.39453125,
+ "learning_rate": 9.545703946428128e-05,
+ "loss": 0.4843,
+ "step": 39400
+ },
+ {
+ "epoch": 0.20353831628151747,
+ "grad_norm": 21280.873046875,
+ "learning_rate": 9.544181293185413e-05,
+ "loss": 0.4805,
+ "step": 39450
+ },
+ {
+ "epoch": 0.2037962862641303,
+ "grad_norm": 19546.134765625,
+ "learning_rate": 9.542656214346713e-05,
+ "loss": 0.4753,
+ "step": 39500
+ },
+ {
+ "epoch": 0.20405425624674312,
+ "grad_norm": 19179.05859375,
+ "learning_rate": 9.541128710726091e-05,
+ "loss": 0.4812,
+ "step": 39550
+ },
+ {
+ "epoch": 0.20431222622935596,
+ "grad_norm": 23525.50390625,
+ "learning_rate": 9.539598783138897e-05,
+ "loss": 0.4843,
+ "step": 39600
+ },
+ {
+ "epoch": 0.20457019621196879,
+ "grad_norm": 19369.103515625,
+ "learning_rate": 9.538066432401775e-05,
+ "loss": 0.4788,
+ "step": 39650
+ },
+ {
+ "epoch": 0.2048281661945816,
+ "grad_norm": 20777.119140625,
+ "learning_rate": 9.536531659332667e-05,
+ "loss": 0.4779,
+ "step": 39700
+ },
+ {
+ "epoch": 0.20508613617719443,
+ "grad_norm": 18987.701171875,
+ "learning_rate": 9.534994464750806e-05,
+ "loss": 0.4807,
+ "step": 39750
+ },
+ {
+ "epoch": 0.20534410615980725,
+ "grad_norm": 19523.873046875,
+ "learning_rate": 9.533454849476712e-05,
+ "loss": 0.4798,
+ "step": 39800
+ },
+ {
+ "epoch": 0.20560207614242007,
+ "grad_norm": 21302.05859375,
+ "learning_rate": 9.531912814332206e-05,
+ "loss": 0.4811,
+ "step": 39850
+ },
+ {
+ "epoch": 0.2058600461250329,
+ "grad_norm": 21545.626953125,
+ "learning_rate": 9.530368360140394e-05,
+ "loss": 0.4814,
+ "step": 39900
+ },
+ {
+ "epoch": 0.2061180161076457,
+ "grad_norm": 22709.7265625,
+ "learning_rate": 9.528821487725678e-05,
+ "loss": 0.4827,
+ "step": 39950
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "grad_norm": 20853.228515625,
+ "learning_rate": 9.527272197913746e-05,
+ "loss": 0.4838,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20637598609025853,
+ "eval_loss": 0.47092095017433167,
+ "eval_runtime": 3339.7722,
+ "eval_samples_per_second": 928.542,
+ "eval_steps_per_second": 1.814,
+ "step": 40000
+ },
+ {
+ "epoch": 0.20663395607287136,
+ "grad_norm": 18389.748046875,
+ "learning_rate": 9.525720491531581e-05,
+ "loss": 0.4809,
+ "step": 40050
+ },
+ {
+ "epoch": 0.20689192605548418,
+ "grad_norm": 20328.59765625,
+ "learning_rate": 9.524166369407453e-05,
+ "loss": 0.4827,
+ "step": 40100
+ },
+ {
+ "epoch": 0.207149896038097,
+ "grad_norm": 21094.966796875,
+ "learning_rate": 9.522609832370924e-05,
+ "loss": 0.484,
+ "step": 40150
+ },
+ {
+ "epoch": 0.20740786602070982,
+ "grad_norm": 22630.64453125,
+ "learning_rate": 9.52105088125284e-05,
+ "loss": 0.4829,
+ "step": 40200
+ },
+ {
+ "epoch": 0.20766583600332264,
+ "grad_norm": 19477.7265625,
+ "learning_rate": 9.51948951688534e-05,
+ "loss": 0.4793,
+ "step": 40250
+ },
+ {
+ "epoch": 0.2079238059859355,
+ "grad_norm": 20242.53125,
+ "learning_rate": 9.517925740101851e-05,
+ "loss": 0.4797,
+ "step": 40300
+ },
+ {
+ "epoch": 0.2081817759685483,
+ "grad_norm": 19952.421875,
+ "learning_rate": 9.516359551737087e-05,
+ "loss": 0.4785,
+ "step": 40350
+ },
+ {
+ "epoch": 0.20843974595116113,
+ "grad_norm": 19216.220703125,
+ "learning_rate": 9.514790952627049e-05,
+ "loss": 0.4753,
+ "step": 40400
+ },
+ {
+ "epoch": 0.20869771593377395,
+ "grad_norm": 20297.515625,
+ "learning_rate": 9.513219943609024e-05,
+ "loss": 0.4792,
+ "step": 40450
+ },
+ {
+ "epoch": 0.20895568591638677,
+ "grad_norm": 19528.7890625,
+ "learning_rate": 9.511646525521585e-05,
+ "loss": 0.4801,
+ "step": 40500
+ },
+ {
+ "epoch": 0.2092136558989996,
+ "grad_norm": 18037.7890625,
+ "learning_rate": 9.510070699204597e-05,
+ "loss": 0.483,
+ "step": 40550
+ },
+ {
+ "epoch": 0.20947162588161242,
+ "grad_norm": 20636.4296875,
+ "learning_rate": 9.508492465499199e-05,
+ "loss": 0.4761,
+ "step": 40600
+ },
+ {
+ "epoch": 0.20972959586422524,
+ "grad_norm": 20096.857421875,
+ "learning_rate": 9.506911825247827e-05,
+ "loss": 0.4804,
+ "step": 40650
+ },
+ {
+ "epoch": 0.20998756584683806,
+ "grad_norm": 20855.619140625,
+ "learning_rate": 9.505328779294192e-05,
+ "loss": 0.4823,
+ "step": 40700
+ },
+ {
+ "epoch": 0.21024553582945088,
+ "grad_norm": 19640.521484375,
+ "learning_rate": 9.503743328483296e-05,
+ "loss": 0.4818,
+ "step": 40750
+ },
+ {
+ "epoch": 0.2105035058120637,
+ "grad_norm": 20990.525390625,
+ "learning_rate": 9.50215547366142e-05,
+ "loss": 0.4804,
+ "step": 40800
+ },
+ {
+ "epoch": 0.21076147579467652,
+ "grad_norm": 18773.564453125,
+ "learning_rate": 9.500565215676132e-05,
+ "loss": 0.4798,
+ "step": 40850
+ },
+ {
+ "epoch": 0.21101944577728934,
+ "grad_norm": 18688.7265625,
+ "learning_rate": 9.498972555376282e-05,
+ "loss": 0.4773,
+ "step": 40900
+ },
+ {
+ "epoch": 0.21127741575990217,
+ "grad_norm": 22649.3671875,
+ "learning_rate": 9.497377493611998e-05,
+ "loss": 0.478,
+ "step": 40950
+ },
+ {
+ "epoch": 0.21153538574251501,
+ "grad_norm": 19575.95703125,
+ "learning_rate": 9.495780031234694e-05,
+ "loss": 0.4809,
+ "step": 41000
+ },
+ {
+ "epoch": 0.21179335572512784,
+ "grad_norm": 18587.681640625,
+ "learning_rate": 9.494180169097067e-05,
+ "loss": 0.4805,
+ "step": 41050
+ },
+ {
+ "epoch": 0.21205132570774066,
+ "grad_norm": 19466.5703125,
+ "learning_rate": 9.492577908053089e-05,
+ "loss": 0.4772,
+ "step": 41100
+ },
+ {
+ "epoch": 0.21230929569035348,
+ "grad_norm": 21085.15234375,
+ "learning_rate": 9.490973248958018e-05,
+ "loss": 0.4787,
+ "step": 41150
+ },
+ {
+ "epoch": 0.2125672656729663,
+ "grad_norm": 21866.95703125,
+ "learning_rate": 9.489366192668388e-05,
+ "loss": 0.4803,
+ "step": 41200
+ },
+ {
+ "epoch": 0.21282523565557912,
+ "grad_norm": 20759.609375,
+ "learning_rate": 9.487756740042015e-05,
+ "loss": 0.4782,
+ "step": 41250
+ },
+ {
+ "epoch": 0.21308320563819194,
+ "grad_norm": 20565.51171875,
+ "learning_rate": 9.486144891937997e-05,
+ "loss": 0.4765,
+ "step": 41300
+ },
+ {
+ "epoch": 0.21334117562080476,
+ "grad_norm": 21536.017578125,
+ "learning_rate": 9.484530649216705e-05,
+ "loss": 0.4753,
+ "step": 41350
+ },
+ {
+ "epoch": 0.21359914560341758,
+ "grad_norm": 19452.001953125,
+ "learning_rate": 9.482914012739788e-05,
+ "loss": 0.4807,
+ "step": 41400
+ },
+ {
+ "epoch": 0.2138571155860304,
+ "grad_norm": 21220.927734375,
+ "learning_rate": 9.481294983370179e-05,
+ "loss": 0.4803,
+ "step": 41450
+ },
+ {
+ "epoch": 0.21411508556864323,
+ "grad_norm": 18278.884765625,
+ "learning_rate": 9.479673561972082e-05,
+ "loss": 0.4807,
+ "step": 41500
+ },
+ {
+ "epoch": 0.21437305555125605,
+ "grad_norm": 21568.13671875,
+ "learning_rate": 9.478049749410983e-05,
+ "loss": 0.4751,
+ "step": 41550
+ },
+ {
+ "epoch": 0.21463102553386887,
+ "grad_norm": 21004.734375,
+ "learning_rate": 9.47642354655364e-05,
+ "loss": 0.4828,
+ "step": 41600
+ },
+ {
+ "epoch": 0.2148889955164817,
+ "grad_norm": 20709.193359375,
+ "learning_rate": 9.474794954268089e-05,
+ "loss": 0.477,
+ "step": 41650
+ },
+ {
+ "epoch": 0.21514696549909454,
+ "grad_norm": 21408.3671875,
+ "learning_rate": 9.47316397342364e-05,
+ "loss": 0.4783,
+ "step": 41700
+ },
+ {
+ "epoch": 0.21540493548170736,
+ "grad_norm": 18606.6328125,
+ "learning_rate": 9.47153060489088e-05,
+ "loss": 0.4771,
+ "step": 41750
+ },
+ {
+ "epoch": 0.21566290546432018,
+ "grad_norm": 19498.20703125,
+ "learning_rate": 9.469894849541667e-05,
+ "loss": 0.4782,
+ "step": 41800
+ },
+ {
+ "epoch": 0.215920875446933,
+ "grad_norm": 20441.9765625,
+ "learning_rate": 9.46825670824914e-05,
+ "loss": 0.4769,
+ "step": 41850
+ },
+ {
+ "epoch": 0.21617884542954582,
+ "grad_norm": 20925.109375,
+ "learning_rate": 9.466616181887704e-05,
+ "loss": 0.4858,
+ "step": 41900
+ },
+ {
+ "epoch": 0.21643681541215865,
+ "grad_norm": 21410.38671875,
+ "learning_rate": 9.464973271333042e-05,
+ "loss": 0.4791,
+ "step": 41950
+ },
+ {
+ "epoch": 0.21669478539477147,
+ "grad_norm": 19169.583984375,
+ "learning_rate": 9.463327977462106e-05,
+ "loss": 0.4783,
+ "step": 42000
+ },
+ {
+ "epoch": 0.2169527553773843,
+ "grad_norm": 19487.3359375,
+ "learning_rate": 9.461680301153124e-05,
+ "loss": 0.4792,
+ "step": 42050
+ },
+ {
+ "epoch": 0.2172107253599971,
+ "grad_norm": 21303.861328125,
+ "learning_rate": 9.460030243285592e-05,
+ "loss": 0.4811,
+ "step": 42100
+ },
+ {
+ "epoch": 0.21746869534260993,
+ "grad_norm": 21529.490234375,
+ "learning_rate": 9.458377804740279e-05,
+ "loss": 0.4761,
+ "step": 42150
+ },
+ {
+ "epoch": 0.21772666532522275,
+ "grad_norm": 21356.505859375,
+ "learning_rate": 9.456722986399227e-05,
+ "loss": 0.477,
+ "step": 42200
+ },
+ {
+ "epoch": 0.21798463530783557,
+ "grad_norm": 19551.33203125,
+ "learning_rate": 9.455065789145742e-05,
+ "loss": 0.4777,
+ "step": 42250
+ },
+ {
+ "epoch": 0.2182426052904484,
+ "grad_norm": 21424.58984375,
+ "learning_rate": 9.453406213864408e-05,
+ "loss": 0.4759,
+ "step": 42300
+ },
+ {
+ "epoch": 0.21850057527306121,
+ "grad_norm": 18835.1953125,
+ "learning_rate": 9.451744261441072e-05,
+ "loss": 0.4749,
+ "step": 42350
+ },
+ {
+ "epoch": 0.21875854525567404,
+ "grad_norm": 20333.490234375,
+ "learning_rate": 9.450079932762852e-05,
+ "loss": 0.4786,
+ "step": 42400
+ },
+ {
+ "epoch": 0.21901651523828689,
+ "grad_norm": 18957.232421875,
+ "learning_rate": 9.448413228718134e-05,
+ "loss": 0.4778,
+ "step": 42450
+ },
+ {
+ "epoch": 0.2192744852208997,
+ "grad_norm": 20251.939453125,
+ "learning_rate": 9.446744150196574e-05,
+ "loss": 0.4759,
+ "step": 42500
+ },
+ {
+ "epoch": 0.21953245520351253,
+ "grad_norm": 20740.82421875,
+ "learning_rate": 9.445072698089091e-05,
+ "loss": 0.4782,
+ "step": 42550
+ },
+ {
+ "epoch": 0.21979042518612535,
+ "grad_norm": 19501.91015625,
+ "learning_rate": 9.443398873287877e-05,
+ "loss": 0.479,
+ "step": 42600
+ },
+ {
+ "epoch": 0.22004839516873817,
+ "grad_norm": 20895.58984375,
+ "learning_rate": 9.441722676686386e-05,
+ "loss": 0.4754,
+ "step": 42650
+ },
+ {
+ "epoch": 0.220306365151351,
+ "grad_norm": 19932.66796875,
+ "learning_rate": 9.440044109179338e-05,
+ "loss": 0.4778,
+ "step": 42700
+ },
+ {
+ "epoch": 0.2205643351339638,
+ "grad_norm": 20158.693359375,
+ "learning_rate": 9.438363171662722e-05,
+ "loss": 0.4755,
+ "step": 42750
+ },
+ {
+ "epoch": 0.22082230511657663,
+ "grad_norm": 19128.953125,
+ "learning_rate": 9.436679865033789e-05,
+ "loss": 0.4744,
+ "step": 42800
+ },
+ {
+ "epoch": 0.22108027509918945,
+ "grad_norm": 19743.517578125,
+ "learning_rate": 9.434994190191054e-05,
+ "loss": 0.4781,
+ "step": 42850
+ },
+ {
+ "epoch": 0.22133824508180228,
+ "grad_norm": 17826.703125,
+ "learning_rate": 9.4333061480343e-05,
+ "loss": 0.4762,
+ "step": 42900
+ },
+ {
+ "epoch": 0.2215962150644151,
+ "grad_norm": 20606.48046875,
+ "learning_rate": 9.43161573946457e-05,
+ "loss": 0.4741,
+ "step": 42950
+ },
+ {
+ "epoch": 0.22185418504702792,
+ "grad_norm": 20116.66796875,
+ "learning_rate": 9.429922965384172e-05,
+ "loss": 0.4766,
+ "step": 43000
+ },
+ {
+ "epoch": 0.22211215502964074,
+ "grad_norm": 20560.970703125,
+ "learning_rate": 9.428227826696674e-05,
+ "loss": 0.481,
+ "step": 43050
+ },
+ {
+ "epoch": 0.22237012501225356,
+ "grad_norm": 20832.01953125,
+ "learning_rate": 9.42653032430691e-05,
+ "loss": 0.4806,
+ "step": 43100
+ },
+ {
+ "epoch": 0.2226280949948664,
+ "grad_norm": 18686.953125,
+ "learning_rate": 9.424830459120974e-05,
+ "loss": 0.4796,
+ "step": 43150
+ },
+ {
+ "epoch": 0.22288606497747923,
+ "grad_norm": 21061.240234375,
+ "learning_rate": 9.423128232046223e-05,
+ "loss": 0.474,
+ "step": 43200
+ },
+ {
+ "epoch": 0.22314403496009205,
+ "grad_norm": 21862.25,
+ "learning_rate": 9.421423643991267e-05,
+ "loss": 0.4721,
+ "step": 43250
+ },
+ {
+ "epoch": 0.22340200494270487,
+ "grad_norm": 18299.23828125,
+ "learning_rate": 9.419716695865988e-05,
+ "loss": 0.4744,
+ "step": 43300
+ },
+ {
+ "epoch": 0.2236599749253177,
+ "grad_norm": 20387.876953125,
+ "learning_rate": 9.418007388581517e-05,
+ "loss": 0.4748,
+ "step": 43350
+ },
+ {
+ "epoch": 0.22391794490793052,
+ "grad_norm": 21721.740234375,
+ "learning_rate": 9.416295723050254e-05,
+ "loss": 0.4782,
+ "step": 43400
+ },
+ {
+ "epoch": 0.22417591489054334,
+ "grad_norm": 20274.72265625,
+ "learning_rate": 9.414581700185851e-05,
+ "loss": 0.4734,
+ "step": 43450
+ },
+ {
+ "epoch": 0.22443388487315616,
+ "grad_norm": 22443.296875,
+ "learning_rate": 9.41286532090322e-05,
+ "loss": 0.4734,
+ "step": 43500
+ },
+ {
+ "epoch": 0.22469185485576898,
+ "grad_norm": 19874.8203125,
+ "learning_rate": 9.411146586118529e-05,
+ "loss": 0.4755,
+ "step": 43550
+ },
+ {
+ "epoch": 0.2249498248383818,
+ "grad_norm": 20362.3125,
+ "learning_rate": 9.409425496749209e-05,
+ "loss": 0.4776,
+ "step": 43600
+ },
+ {
+ "epoch": 0.22520779482099462,
+ "grad_norm": 22146.5078125,
+ "learning_rate": 9.40770205371394e-05,
+ "loss": 0.4784,
+ "step": 43650
+ },
+ {
+ "epoch": 0.22546576480360744,
+ "grad_norm": 19917.83203125,
+ "learning_rate": 9.405976257932667e-05,
+ "loss": 0.4744,
+ "step": 43700
+ },
+ {
+ "epoch": 0.22572373478622026,
+ "grad_norm": 19296.904296875,
+ "learning_rate": 9.404248110326583e-05,
+ "loss": 0.4766,
+ "step": 43750
+ },
+ {
+ "epoch": 0.22598170476883309,
+ "grad_norm": 20648.35546875,
+ "learning_rate": 9.402517611818142e-05,
+ "loss": 0.4801,
+ "step": 43800
+ },
+ {
+ "epoch": 0.22623967475144593,
+ "grad_norm": 21750.517578125,
+ "learning_rate": 9.40078476333105e-05,
+ "loss": 0.4752,
+ "step": 43850
+ },
+ {
+ "epoch": 0.22649764473405876,
+ "grad_norm": 21233.337890625,
+ "learning_rate": 9.399049565790266e-05,
+ "loss": 0.4758,
+ "step": 43900
+ },
+ {
+ "epoch": 0.22675561471667158,
+ "grad_norm": 21952.6796875,
+ "learning_rate": 9.397312020122006e-05,
+ "loss": 0.4755,
+ "step": 43950
+ },
+ {
+ "epoch": 0.2270135846992844,
+ "grad_norm": 18598.826171875,
+ "learning_rate": 9.39557212725374e-05,
+ "loss": 0.4725,
+ "step": 44000
+ },
+ {
+ "epoch": 0.22727155468189722,
+ "grad_norm": 20325.51171875,
+ "learning_rate": 9.393829888114188e-05,
+ "loss": 0.4789,
+ "step": 44050
+ },
+ {
+ "epoch": 0.22752952466451004,
+ "grad_norm": 17499.228515625,
+ "learning_rate": 9.392085303633323e-05,
+ "loss": 0.4738,
+ "step": 44100
+ },
+ {
+ "epoch": 0.22778749464712286,
+ "grad_norm": 21283.970703125,
+ "learning_rate": 9.39033837474237e-05,
+ "loss": 0.4743,
+ "step": 44150
+ },
+ {
+ "epoch": 0.22804546462973568,
+ "grad_norm": 19672.765625,
+ "learning_rate": 9.388589102373807e-05,
+ "loss": 0.4751,
+ "step": 44200
+ },
+ {
+ "epoch": 0.2283034346123485,
+ "grad_norm": 19722.314453125,
+ "learning_rate": 9.386837487461361e-05,
+ "loss": 0.4767,
+ "step": 44250
+ },
+ {
+ "epoch": 0.22856140459496133,
+ "grad_norm": 19948.154296875,
+ "learning_rate": 9.38508353094001e-05,
+ "loss": 0.4765,
+ "step": 44300
+ },
+ {
+ "epoch": 0.22881937457757415,
+ "grad_norm": 19880.611328125,
+ "learning_rate": 9.383327233745984e-05,
+ "loss": 0.4754,
+ "step": 44350
+ },
+ {
+ "epoch": 0.22907734456018697,
+ "grad_norm": 20052.91796875,
+ "learning_rate": 9.381568596816757e-05,
+ "loss": 0.4801,
+ "step": 44400
+ },
+ {
+ "epoch": 0.2293353145427998,
+ "grad_norm": 23129.869140625,
+ "learning_rate": 9.379807621091057e-05,
+ "loss": 0.4713,
+ "step": 44450
+ },
+ {
+ "epoch": 0.2295932845254126,
+ "grad_norm": 19922.0703125,
+ "learning_rate": 9.37804430750886e-05,
+ "loss": 0.4736,
+ "step": 44500
+ },
+ {
+ "epoch": 0.22985125450802546,
+ "grad_norm": 19704.24609375,
+ "learning_rate": 9.376278657011388e-05,
+ "loss": 0.4682,
+ "step": 44550
+ },
+ {
+ "epoch": 0.23010922449063828,
+ "grad_norm": 19080.125,
+ "learning_rate": 9.374510670541109e-05,
+ "loss": 0.4751,
+ "step": 44600
+ },
+ {
+ "epoch": 0.2303671944732511,
+ "grad_norm": 20858.388671875,
+ "learning_rate": 9.372740349041742e-05,
+ "loss": 0.4734,
+ "step": 44650
+ },
+ {
+ "epoch": 0.23062516445586392,
+ "grad_norm": 22074.056640625,
+ "learning_rate": 9.37096769345825e-05,
+ "loss": 0.4699,
+ "step": 44700
+ },
+ {
+ "epoch": 0.23088313443847674,
+ "grad_norm": 21852.623046875,
+ "learning_rate": 9.369192704736842e-05,
+ "loss": 0.47,
+ "step": 44750
+ },
+ {
+ "epoch": 0.23114110442108957,
+ "grad_norm": 20904.033203125,
+ "learning_rate": 9.367415383824974e-05,
+ "loss": 0.4736,
+ "step": 44800
+ },
+ {
+ "epoch": 0.2313990744037024,
+ "grad_norm": 18965.021484375,
+ "learning_rate": 9.365635731671343e-05,
+ "loss": 0.4687,
+ "step": 44850
+ },
+ {
+ "epoch": 0.2316570443863152,
+ "grad_norm": 16994.271484375,
+ "learning_rate": 9.363853749225894e-05,
+ "loss": 0.4747,
+ "step": 44900
+ },
+ {
+ "epoch": 0.23191501436892803,
+ "grad_norm": 19191.794921875,
+ "learning_rate": 9.362069437439814e-05,
+ "loss": 0.4689,
+ "step": 44950
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "grad_norm": 19691.982421875,
+ "learning_rate": 9.360282797265537e-05,
+ "loss": 0.4683,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23217298435154085,
+ "eval_loss": 0.4633353352546692,
+ "eval_runtime": 3256.5731,
+ "eval_samples_per_second": 952.265,
+ "eval_steps_per_second": 1.86,
+ "step": 45000
+ },
+ {
+ "epoch": 0.23243095433415367,
+ "grad_norm": 21778.20703125,
+ "learning_rate": 9.358493829656732e-05,
+ "loss": 0.4726,
+ "step": 45050
+ },
+ {
+ "epoch": 0.2326889243167665,
+ "grad_norm": 20281.802734375,
+ "learning_rate": 9.35670253556832e-05,
+ "loss": 0.4752,
+ "step": 45100
+ },
+ {
+ "epoch": 0.23294689429937931,
+ "grad_norm": 20620.580078125,
+ "learning_rate": 9.354908915956456e-05,
+ "loss": 0.474,
+ "step": 45150
+ },
+ {
+ "epoch": 0.23320486428199214,
+ "grad_norm": 21115.86328125,
+ "learning_rate": 9.353112971778542e-05,
+ "loss": 0.4763,
+ "step": 45200
+ },
+ {
+ "epoch": 0.23346283426460496,
+ "grad_norm": 19746.30859375,
+ "learning_rate": 9.351314703993215e-05,
+ "loss": 0.4792,
+ "step": 45250
+ },
+ {
+ "epoch": 0.2337208042472178,
+ "grad_norm": 21270.26171875,
+ "learning_rate": 9.349514113560358e-05,
+ "loss": 0.4726,
+ "step": 45300
+ },
+ {
+ "epoch": 0.23397877422983063,
+ "grad_norm": 20273.658203125,
+ "learning_rate": 9.347711201441092e-05,
+ "loss": 0.4683,
+ "step": 45350
+ },
+ {
+ "epoch": 0.23423674421244345,
+ "grad_norm": 19746.9609375,
+ "learning_rate": 9.345905968597773e-05,
+ "loss": 0.4778,
+ "step": 45400
+ },
+ {
+ "epoch": 0.23449471419505627,
+ "grad_norm": 22999.52734375,
+ "learning_rate": 9.344098415994003e-05,
+ "loss": 0.4799,
+ "step": 45450
+ },
+ {
+ "epoch": 0.2347526841776691,
+ "grad_norm": 19922.41015625,
+ "learning_rate": 9.342288544594617e-05,
+ "loss": 0.4773,
+ "step": 45500
+ },
+ {
+ "epoch": 0.2350106541602819,
+ "grad_norm": 19793.73828125,
+ "learning_rate": 9.340476355365688e-05,
+ "loss": 0.4743,
+ "step": 45550
+ },
+ {
+ "epoch": 0.23526862414289473,
+ "grad_norm": 19525.74609375,
+ "learning_rate": 9.33866184927453e-05,
+ "loss": 0.4729,
+ "step": 45600
+ },
+ {
+ "epoch": 0.23552659412550755,
+ "grad_norm": 26093.65625,
+ "learning_rate": 9.336845027289691e-05,
+ "loss": 0.4767,
+ "step": 45650
+ },
+ {
+ "epoch": 0.23578456410812038,
+ "grad_norm": 20045.16796875,
+ "learning_rate": 9.335025890380953e-05,
+ "loss": 0.4768,
+ "step": 45700
+ },
+ {
+ "epoch": 0.2360425340907332,
+ "grad_norm": 21272.36328125,
+ "learning_rate": 9.333204439519338e-05,
+ "loss": 0.4738,
+ "step": 45750
+ },
+ {
+ "epoch": 0.23630050407334602,
+ "grad_norm": 19174.44921875,
+ "learning_rate": 9.3313806756771e-05,
+ "loss": 0.4752,
+ "step": 45800
+ },
+ {
+ "epoch": 0.23655847405595884,
+ "grad_norm": 18446.640625,
+ "learning_rate": 9.32955459982773e-05,
+ "loss": 0.4747,
+ "step": 45850
+ },
+ {
+ "epoch": 0.23681644403857166,
+ "grad_norm": 23397.7109375,
+ "learning_rate": 9.327726212945953e-05,
+ "loss": 0.4723,
+ "step": 45900
+ },
+ {
+ "epoch": 0.23707441402118448,
+ "grad_norm": 20350.755859375,
+ "learning_rate": 9.325895516007725e-05,
+ "loss": 0.4671,
+ "step": 45950
+ },
+ {
+ "epoch": 0.23733238400379733,
+ "grad_norm": 21147.5546875,
+ "learning_rate": 9.324062509990235e-05,
+ "loss": 0.4689,
+ "step": 46000
+ },
+ {
+ "epoch": 0.23759035398641015,
+ "grad_norm": 19813.130859375,
+ "learning_rate": 9.322227195871909e-05,
+ "loss": 0.4723,
+ "step": 46050
+ },
+ {
+ "epoch": 0.23784832396902297,
+ "grad_norm": 22310.037109375,
+ "learning_rate": 9.320389574632399e-05,
+ "loss": 0.4727,
+ "step": 46100
+ },
+ {
+ "epoch": 0.2381062939516358,
+ "grad_norm": 19646.509765625,
+ "learning_rate": 9.318549647252596e-05,
+ "loss": 0.4723,
+ "step": 46150
+ },
+ {
+ "epoch": 0.23836426393424862,
+ "grad_norm": 20145.29296875,
+ "learning_rate": 9.316707414714614e-05,
+ "loss": 0.4652,
+ "step": 46200
+ },
+ {
+ "epoch": 0.23862223391686144,
+ "grad_norm": 19513.466796875,
+ "learning_rate": 9.314862878001803e-05,
+ "loss": 0.4774,
+ "step": 46250
+ },
+ {
+ "epoch": 0.23888020389947426,
+ "grad_norm": 20701.25390625,
+ "learning_rate": 9.313016038098739e-05,
+ "loss": 0.4721,
+ "step": 46300
+ },
+ {
+ "epoch": 0.23913817388208708,
+ "grad_norm": 18766.328125,
+ "learning_rate": 9.31116689599123e-05,
+ "loss": 0.4691,
+ "step": 46350
+ },
+ {
+ "epoch": 0.2393961438646999,
+ "grad_norm": 20925.5,
+ "learning_rate": 9.309315452666314e-05,
+ "loss": 0.4743,
+ "step": 46400
+ },
+ {
+ "epoch": 0.23965411384731272,
+ "grad_norm": 19413.0703125,
+ "learning_rate": 9.307461709112253e-05,
+ "loss": 0.469,
+ "step": 46450
+ },
+ {
+ "epoch": 0.23991208382992554,
+ "grad_norm": 18517.669921875,
+ "learning_rate": 9.305605666318543e-05,
+ "loss": 0.4769,
+ "step": 46500
+ },
+ {
+ "epoch": 0.24017005381253836,
+ "grad_norm": 20222.50390625,
+ "learning_rate": 9.3037473252759e-05,
+ "loss": 0.4701,
+ "step": 46550
+ },
+ {
+ "epoch": 0.24042802379515119,
+ "grad_norm": 21650.63671875,
+ "learning_rate": 9.301886686976272e-05,
+ "loss": 0.4693,
+ "step": 46600
+ },
+ {
+ "epoch": 0.240685993777764,
+ "grad_norm": 18923.498046875,
+ "learning_rate": 9.300023752412832e-05,
+ "loss": 0.4749,
+ "step": 46650
+ },
+ {
+ "epoch": 0.24094396376037686,
+ "grad_norm": 21353.748046875,
+ "learning_rate": 9.298158522579978e-05,
+ "loss": 0.4735,
+ "step": 46700
+ },
+ {
+ "epoch": 0.24120193374298968,
+ "grad_norm": 19405.5234375,
+ "learning_rate": 9.296290998473334e-05,
+ "loss": 0.4708,
+ "step": 46750
+ },
+ {
+ "epoch": 0.2414599037256025,
+ "grad_norm": 21692.3203125,
+ "learning_rate": 9.294421181089747e-05,
+ "loss": 0.4644,
+ "step": 46800
+ },
+ {
+ "epoch": 0.24171787370821532,
+ "grad_norm": 18488.671875,
+ "learning_rate": 9.292549071427291e-05,
+ "loss": 0.4668,
+ "step": 46850
+ },
+ {
+ "epoch": 0.24197584369082814,
+ "grad_norm": 21951.712890625,
+ "learning_rate": 9.29067467048526e-05,
+ "loss": 0.4749,
+ "step": 46900
+ },
+ {
+ "epoch": 0.24223381367344096,
+ "grad_norm": 20673.82421875,
+ "learning_rate": 9.288797979264176e-05,
+ "loss": 0.4687,
+ "step": 46950
+ },
+ {
+ "epoch": 0.24249178365605378,
+ "grad_norm": 18687.69140625,
+ "learning_rate": 9.286918998765776e-05,
+ "loss": 0.4731,
+ "step": 47000
+ },
+ {
+ "epoch": 0.2427497536386666,
+ "grad_norm": 18882.009765625,
+ "learning_rate": 9.285037729993027e-05,
+ "loss": 0.4699,
+ "step": 47050
+ },
+ {
+ "epoch": 0.24300772362127943,
+ "grad_norm": 22378.685546875,
+ "learning_rate": 9.283154173950112e-05,
+ "loss": 0.4678,
+ "step": 47100
+ },
+ {
+ "epoch": 0.24326569360389225,
+ "grad_norm": 19457.736328125,
+ "learning_rate": 9.281268331642439e-05,
+ "loss": 0.4665,
+ "step": 47150
+ },
+ {
+ "epoch": 0.24352366358650507,
+ "grad_norm": 19794.4296875,
+ "learning_rate": 9.279380204076631e-05,
+ "loss": 0.4683,
+ "step": 47200
+ },
+ {
+ "epoch": 0.2437816335691179,
+ "grad_norm": 18910.41796875,
+ "learning_rate": 9.277489792260536e-05,
+ "loss": 0.4683,
+ "step": 47250
+ },
+ {
+ "epoch": 0.2440396035517307,
+ "grad_norm": 21774.009765625,
+ "learning_rate": 9.275597097203216e-05,
+ "loss": 0.4729,
+ "step": 47300
+ },
+ {
+ "epoch": 0.24429757353434353,
+ "grad_norm": 21403.1796875,
+ "learning_rate": 9.273702119914962e-05,
+ "loss": 0.4681,
+ "step": 47350
+ },
+ {
+ "epoch": 0.24455554351695638,
+ "grad_norm": 20333.400390625,
+ "learning_rate": 9.271804861407269e-05,
+ "loss": 0.4713,
+ "step": 47400
+ },
+ {
+ "epoch": 0.2448135134995692,
+ "grad_norm": 22196.32421875,
+ "learning_rate": 9.269905322692862e-05,
+ "loss": 0.468,
+ "step": 47450
+ },
+ {
+ "epoch": 0.24507148348218202,
+ "grad_norm": 18356.623046875,
+ "learning_rate": 9.268003504785673e-05,
+ "loss": 0.4663,
+ "step": 47500
+ },
+ {
+ "epoch": 0.24532945346479484,
+ "grad_norm": 20337.546875,
+ "learning_rate": 9.266099408700859e-05,
+ "loss": 0.4657,
+ "step": 47550
+ },
+ {
+ "epoch": 0.24558742344740767,
+ "grad_norm": 20426.03515625,
+ "learning_rate": 9.264193035454789e-05,
+ "loss": 0.4677,
+ "step": 47600
+ },
+ {
+ "epoch": 0.2458453934300205,
+ "grad_norm": 20962.81640625,
+ "learning_rate": 9.262284386065047e-05,
+ "loss": 0.4759,
+ "step": 47650
+ },
+ {
+ "epoch": 0.2461033634126333,
+ "grad_norm": 20498.919921875,
+ "learning_rate": 9.260373461550435e-05,
+ "loss": 0.4647,
+ "step": 47700
+ },
+ {
+ "epoch": 0.24636133339524613,
+ "grad_norm": 21223.171875,
+ "learning_rate": 9.258460262930967e-05,
+ "loss": 0.4698,
+ "step": 47750
+ },
+ {
+ "epoch": 0.24661930337785895,
+ "grad_norm": 21146.671875,
+ "learning_rate": 9.256544791227871e-05,
+ "loss": 0.4727,
+ "step": 47800
+ },
+ {
+ "epoch": 0.24687727336047177,
+ "grad_norm": 19261.603515625,
+ "learning_rate": 9.254627047463588e-05,
+ "loss": 0.4734,
+ "step": 47850
+ },
+ {
+ "epoch": 0.2471352433430846,
+ "grad_norm": 21131.298828125,
+ "learning_rate": 9.252707032661774e-05,
+ "loss": 0.4686,
+ "step": 47900
+ },
+ {
+ "epoch": 0.24739321332569741,
+ "grad_norm": 22491.212890625,
+ "learning_rate": 9.250784747847294e-05,
+ "loss": 0.4701,
+ "step": 47950
+ },
+ {
+ "epoch": 0.24765118330831024,
+ "grad_norm": 20198.486328125,
+ "learning_rate": 9.248860194046228e-05,
+ "loss": 0.4657,
+ "step": 48000
+ },
+ {
+ "epoch": 0.24790915329092306,
+ "grad_norm": 21754.078125,
+ "learning_rate": 9.246933372285863e-05,
+ "loss": 0.4674,
+ "step": 48050
+ },
+ {
+ "epoch": 0.24816712327353588,
+ "grad_norm": 20948.244140625,
+ "learning_rate": 9.245004283594703e-05,
+ "loss": 0.4604,
+ "step": 48100
+ },
+ {
+ "epoch": 0.24842509325614873,
+ "grad_norm": 20916.3671875,
+ "learning_rate": 9.243072929002454e-05,
+ "loss": 0.4656,
+ "step": 48150
+ },
+ {
+ "epoch": 0.24868306323876155,
+ "grad_norm": 19935.021484375,
+ "learning_rate": 9.24113930954004e-05,
+ "loss": 0.4735,
+ "step": 48200
+ },
+ {
+ "epoch": 0.24894103322137437,
+ "grad_norm": 20075.96875,
+ "learning_rate": 9.239203426239585e-05,
+ "loss": 0.4679,
+ "step": 48250
+ },
+ {
+ "epoch": 0.2491990032039872,
+ "grad_norm": 20107.943359375,
+ "learning_rate": 9.23726528013443e-05,
+ "loss": 0.4773,
+ "step": 48300
+ },
+ {
+ "epoch": 0.2494569731866,
+ "grad_norm": 20341.1171875,
+ "learning_rate": 9.235324872259119e-05,
+ "loss": 0.4699,
+ "step": 48350
+ },
+ {
+ "epoch": 0.24971494316921283,
+ "grad_norm": 21787.4296875,
+ "learning_rate": 9.233382203649401e-05,
+ "loss": 0.4665,
+ "step": 48400
+ },
+ {
+ "epoch": 0.24997291315182565,
+ "grad_norm": 17707.583984375,
+ "learning_rate": 9.231437275342239e-05,
+ "loss": 0.4678,
+ "step": 48450
+ },
+ {
+ "epoch": 0.2502308831344385,
+ "grad_norm": 24467.810546875,
+ "learning_rate": 9.229490088375797e-05,
+ "loss": 0.466,
+ "step": 48500
+ },
+ {
+ "epoch": 0.2504888531170513,
+ "grad_norm": 20794.73828125,
+ "learning_rate": 9.227540643789446e-05,
+ "loss": 0.4711,
+ "step": 48550
+ },
+ {
+ "epoch": 0.2507468230996641,
+ "grad_norm": 20147.099609375,
+ "learning_rate": 9.225588942623758e-05,
+ "loss": 0.4689,
+ "step": 48600
+ },
+ {
+ "epoch": 0.25100479308227697,
+ "grad_norm": 20704.037109375,
+ "learning_rate": 9.223634985920517e-05,
+ "loss": 0.4687,
+ "step": 48650
+ },
+ {
+ "epoch": 0.25126276306488976,
+ "grad_norm": 19472.21875,
+ "learning_rate": 9.221678774722707e-05,
+ "loss": 0.4636,
+ "step": 48700
+ },
+ {
+ "epoch": 0.2515207330475026,
+ "grad_norm": 21352.755859375,
+ "learning_rate": 9.219720310074515e-05,
+ "loss": 0.4671,
+ "step": 48750
+ },
+ {
+ "epoch": 0.2517787030301154,
+ "grad_norm": 20956.146484375,
+ "learning_rate": 9.21775959302133e-05,
+ "loss": 0.4703,
+ "step": 48800
+ },
+ {
+ "epoch": 0.25203667301272825,
+ "grad_norm": 26295.541015625,
+ "learning_rate": 9.215796624609749e-05,
+ "loss": 0.4742,
+ "step": 48850
+ },
+ {
+ "epoch": 0.25229464299534105,
+ "grad_norm": 19862.15625,
+ "learning_rate": 9.213831405887564e-05,
+ "loss": 0.468,
+ "step": 48900
+ },
+ {
+ "epoch": 0.2525526129779539,
+ "grad_norm": 21760.404296875,
+ "learning_rate": 9.211863937903769e-05,
+ "loss": 0.4728,
+ "step": 48950
+ },
+ {
+ "epoch": 0.2528105829605667,
+ "grad_norm": 22488.1484375,
+ "learning_rate": 9.209894221708564e-05,
+ "loss": 0.4627,
+ "step": 49000
+ },
+ {
+ "epoch": 0.25306855294317954,
+ "grad_norm": 20244.5,
+ "learning_rate": 9.20792225835334e-05,
+ "loss": 0.4706,
+ "step": 49050
+ },
+ {
+ "epoch": 0.25332652292579233,
+ "grad_norm": 22642.44140625,
+ "learning_rate": 9.205948048890698e-05,
+ "loss": 0.4708,
+ "step": 49100
+ },
+ {
+ "epoch": 0.2535844929084052,
+ "grad_norm": 23121.501953125,
+ "learning_rate": 9.203971594374432e-05,
+ "loss": 0.4723,
+ "step": 49150
+ },
+ {
+ "epoch": 0.25384246289101803,
+ "grad_norm": 19514.916015625,
+ "learning_rate": 9.201992895859532e-05,
+ "loss": 0.4692,
+ "step": 49200
+ },
+ {
+ "epoch": 0.2541004328736308,
+ "grad_norm": 19467.662109375,
+ "learning_rate": 9.200011954402193e-05,
+ "loss": 0.4719,
+ "step": 49250
+ },
+ {
+ "epoch": 0.25435840285624367,
+ "grad_norm": 20737.7578125,
+ "learning_rate": 9.198028771059799e-05,
+ "loss": 0.4643,
+ "step": 49300
+ },
+ {
+ "epoch": 0.25461637283885646,
+ "grad_norm": 20229.341796875,
+ "learning_rate": 9.196043346890939e-05,
+ "loss": 0.462,
+ "step": 49350
+ },
+ {
+ "epoch": 0.2548743428214693,
+ "grad_norm": 23094.35546875,
+ "learning_rate": 9.194055682955392e-05,
+ "loss": 0.4701,
+ "step": 49400
+ },
+ {
+ "epoch": 0.2551323128040821,
+ "grad_norm": 21099.541015625,
+ "learning_rate": 9.192065780314132e-05,
+ "loss": 0.466,
+ "step": 49450
+ },
+ {
+ "epoch": 0.25539028278669496,
+ "grad_norm": 21500.302734375,
+ "learning_rate": 9.190073640029335e-05,
+ "loss": 0.4703,
+ "step": 49500
+ },
+ {
+ "epoch": 0.25564825276930775,
+ "grad_norm": 24272.228515625,
+ "learning_rate": 9.188079263164366e-05,
+ "loss": 0.4672,
+ "step": 49550
+ },
+ {
+ "epoch": 0.2559062227519206,
+ "grad_norm": 21129.013671875,
+ "learning_rate": 9.186082650783783e-05,
+ "loss": 0.4715,
+ "step": 49600
+ },
+ {
+ "epoch": 0.2561641927345334,
+ "grad_norm": 20696.32421875,
+ "learning_rate": 9.184083803953339e-05,
+ "loss": 0.4646,
+ "step": 49650
+ },
+ {
+ "epoch": 0.25642216271714624,
+ "grad_norm": 20142.7890625,
+ "learning_rate": 9.18208272373998e-05,
+ "loss": 0.4627,
+ "step": 49700
+ },
+ {
+ "epoch": 0.25668013269975903,
+ "grad_norm": 18810.43359375,
+ "learning_rate": 9.180079411211847e-05,
+ "loss": 0.4659,
+ "step": 49750
+ },
+ {
+ "epoch": 0.2569381026823719,
+ "grad_norm": 23121.84765625,
+ "learning_rate": 9.178073867438264e-05,
+ "loss": 0.4683,
+ "step": 49800
+ },
+ {
+ "epoch": 0.2571960726649847,
+ "grad_norm": 20432.021484375,
+ "learning_rate": 9.176066093489755e-05,
+ "loss": 0.4704,
+ "step": 49850
+ },
+ {
+ "epoch": 0.2574540426475975,
+ "grad_norm": 22056.09765625,
+ "learning_rate": 9.17405609043803e-05,
+ "loss": 0.4753,
+ "step": 49900
+ },
+ {
+ "epoch": 0.2577120126302104,
+ "grad_norm": 21094.931640625,
+ "learning_rate": 9.17204385935599e-05,
+ "loss": 0.4648,
+ "step": 49950
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "grad_norm": 20127.525390625,
+ "learning_rate": 9.170029401317725e-05,
+ "loss": 0.4646,
+ "step": 50000
+ },
+ {
+ "epoch": 0.25796998261282317,
+ "eval_loss": 0.4567689299583435,
+ "eval_runtime": 3268.0543,
+ "eval_samples_per_second": 948.919,
+ "eval_steps_per_second": 1.853,
+ "step": 50000
+ },
+ {
+ "epoch": 0.258227952595436,
+ "grad_norm": 20947.306640625,
+ "learning_rate": 9.168012717398516e-05,
+ "loss": 0.4688,
+ "step": 50050
+ },
+ {
+ "epoch": 0.2584859225780488,
+ "grad_norm": 23591.646484375,
+ "learning_rate": 9.165993808674823e-05,
+ "loss": 0.4683,
+ "step": 50100
+ },
+ {
+ "epoch": 0.25874389256066166,
+ "grad_norm": 21227.677734375,
+ "learning_rate": 9.163972676224306e-05,
+ "loss": 0.4671,
+ "step": 50150
+ },
+ {
+ "epoch": 0.25900186254327445,
+ "grad_norm": 20084.953125,
+ "learning_rate": 9.161949321125807e-05,
+ "loss": 0.4598,
+ "step": 50200
+ },
+ {
+ "epoch": 0.2592598325258873,
+ "grad_norm": 21139.5,
+ "learning_rate": 9.159923744459349e-05,
+ "loss": 0.4707,
+ "step": 50250
+ },
+ {
+ "epoch": 0.2595178025085001,
+ "grad_norm": 20410.794921875,
+ "learning_rate": 9.15789594730615e-05,
+ "loss": 0.4675,
+ "step": 50300
+ },
+ {
+ "epoch": 0.25977577249111294,
+ "grad_norm": 20010.328125,
+ "learning_rate": 9.155865930748608e-05,
+ "loss": 0.4599,
+ "step": 50350
+ },
+ {
+ "epoch": 0.26003374247372574,
+ "grad_norm": 23502.890625,
+ "learning_rate": 9.153833695870304e-05,
+ "loss": 0.4664,
+ "step": 50400
+ },
+ {
+ "epoch": 0.2602917124563386,
+ "grad_norm": 20373.498046875,
+ "learning_rate": 9.151799243756008e-05,
+ "loss": 0.4655,
+ "step": 50450
+ },
+ {
+ "epoch": 0.2605496824389514,
+ "grad_norm": 21093.669921875,
+ "learning_rate": 9.149762575491671e-05,
+ "loss": 0.4623,
+ "step": 50500
+ },
+ {
+ "epoch": 0.26080765242156423,
+ "grad_norm": 22206.87890625,
+ "learning_rate": 9.147723692164427e-05,
+ "loss": 0.4687,
+ "step": 50550
+ },
+ {
+ "epoch": 0.261065622404177,
+ "grad_norm": 23264.875,
+ "learning_rate": 9.145682594862593e-05,
+ "loss": 0.4705,
+ "step": 50600
+ },
+ {
+ "epoch": 0.26132359238678987,
+ "grad_norm": 22029.849609375,
+ "learning_rate": 9.143639284675664e-05,
+ "loss": 0.4673,
+ "step": 50650
+ },
+ {
+ "epoch": 0.2615815623694027,
+ "grad_norm": 23016.955078125,
+ "learning_rate": 9.141593762694323e-05,
+ "loss": 0.4663,
+ "step": 50700
+ },
+ {
+ "epoch": 0.2618395323520155,
+ "grad_norm": 21590.80859375,
+ "learning_rate": 9.139546030010427e-05,
+ "loss": 0.4684,
+ "step": 50750
+ },
+ {
+ "epoch": 0.26209750233462836,
+ "grad_norm": 19839.986328125,
+ "learning_rate": 9.13749608771702e-05,
+ "loss": 0.4682,
+ "step": 50800
+ },
+ {
+ "epoch": 0.26235547231724116,
+ "grad_norm": 17922.802734375,
+ "learning_rate": 9.135443936908318e-05,
+ "loss": 0.4601,
+ "step": 50850
+ },
+ {
+ "epoch": 0.262613442299854,
+ "grad_norm": 21141.119140625,
+ "learning_rate": 9.133389578679723e-05,
+ "loss": 0.467,
+ "step": 50900
+ },
+ {
+ "epoch": 0.2628714122824668,
+ "grad_norm": 21858.158203125,
+ "learning_rate": 9.131333014127806e-05,
+ "loss": 0.4663,
+ "step": 50950
+ },
+ {
+ "epoch": 0.26312938226507965,
+ "grad_norm": 21516.46875,
+ "learning_rate": 9.129274244350326e-05,
+ "loss": 0.4656,
+ "step": 51000
+ },
+ {
+ "epoch": 0.26338735224769244,
+ "grad_norm": 21403.263671875,
+ "learning_rate": 9.127213270446213e-05,
+ "loss": 0.4717,
+ "step": 51050
+ },
+ {
+ "epoch": 0.2636453222303053,
+ "grad_norm": 20405.4296875,
+ "learning_rate": 9.125150093515575e-05,
+ "loss": 0.4656,
+ "step": 51100
+ },
+ {
+ "epoch": 0.2639032922129181,
+ "grad_norm": 21057.57421875,
+ "learning_rate": 9.123084714659698e-05,
+ "loss": 0.4655,
+ "step": 51150
+ },
+ {
+ "epoch": 0.26416126219553093,
+ "grad_norm": 19891.15234375,
+ "learning_rate": 9.121017134981036e-05,
+ "loss": 0.4706,
+ "step": 51200
+ },
+ {
+ "epoch": 0.2644192321781437,
+ "grad_norm": 20441.30078125,
+ "learning_rate": 9.118947355583228e-05,
+ "loss": 0.4707,
+ "step": 51250
+ },
+ {
+ "epoch": 0.2646772021607566,
+ "grad_norm": 22182.67578125,
+ "learning_rate": 9.11687537757108e-05,
+ "loss": 0.4633,
+ "step": 51300
+ },
+ {
+ "epoch": 0.2649351721433694,
+ "grad_norm": 18211.728515625,
+ "learning_rate": 9.114801202050574e-05,
+ "loss": 0.4677,
+ "step": 51350
+ },
+ {
+ "epoch": 0.2651931421259822,
+ "grad_norm": 20691.697265625,
+ "learning_rate": 9.112724830128865e-05,
+ "loss": 0.4634,
+ "step": 51400
+ },
+ {
+ "epoch": 0.26545111210859507,
+ "grad_norm": 19717.75390625,
+ "learning_rate": 9.110646262914279e-05,
+ "loss": 0.4647,
+ "step": 51450
+ },
+ {
+ "epoch": 0.26570908209120786,
+ "grad_norm": 19860.55078125,
+ "learning_rate": 9.108565501516318e-05,
+ "loss": 0.4665,
+ "step": 51500
+ },
+ {
+ "epoch": 0.2659670520738207,
+ "grad_norm": 20122.984375,
+ "learning_rate": 9.106482547045648e-05,
+ "loss": 0.4663,
+ "step": 51550
+ },
+ {
+ "epoch": 0.2662250220564335,
+ "grad_norm": 21214.724609375,
+ "learning_rate": 9.104397400614112e-05,
+ "loss": 0.4676,
+ "step": 51600
+ },
+ {
+ "epoch": 0.26648299203904635,
+ "grad_norm": 24545.041015625,
+ "learning_rate": 9.102310063334722e-05,
+ "loss": 0.4705,
+ "step": 51650
+ },
+ {
+ "epoch": 0.26674096202165914,
+ "grad_norm": 22479.380859375,
+ "learning_rate": 9.100220536321655e-05,
+ "loss": 0.4616,
+ "step": 51700
+ },
+ {
+ "epoch": 0.266998932004272,
+ "grad_norm": 20262.27734375,
+ "learning_rate": 9.098128820690264e-05,
+ "loss": 0.4569,
+ "step": 51750
+ },
+ {
+ "epoch": 0.2672569019868848,
+ "grad_norm": 20906.880859375,
+ "learning_rate": 9.096034917557062e-05,
+ "loss": 0.468,
+ "step": 51800
+ },
+ {
+ "epoch": 0.26751487196949764,
+ "grad_norm": 20986.455078125,
+ "learning_rate": 9.093938828039737e-05,
+ "loss": 0.4697,
+ "step": 51850
+ },
+ {
+ "epoch": 0.26777284195211043,
+ "grad_norm": 22425.681640625,
+ "learning_rate": 9.09184055325714e-05,
+ "loss": 0.4692,
+ "step": 51900
+ },
+ {
+ "epoch": 0.2680308119347233,
+ "grad_norm": 21817.744140625,
+ "learning_rate": 9.089740094329288e-05,
+ "loss": 0.4726,
+ "step": 51950
+ },
+ {
+ "epoch": 0.26828878191733607,
+ "grad_norm": 20527.017578125,
+ "learning_rate": 9.087637452377369e-05,
+ "loss": 0.459,
+ "step": 52000
+ },
+ {
+ "epoch": 0.2685467518999489,
+ "grad_norm": 24486.521484375,
+ "learning_rate": 9.08553262852373e-05,
+ "loss": 0.4624,
+ "step": 52050
+ },
+ {
+ "epoch": 0.26880472188256177,
+ "grad_norm": 20964.537109375,
+ "learning_rate": 9.083425623891885e-05,
+ "loss": 0.4657,
+ "step": 52100
+ },
+ {
+ "epoch": 0.26906269186517456,
+ "grad_norm": 20966.478515625,
+ "learning_rate": 9.081316439606513e-05,
+ "loss": 0.4723,
+ "step": 52150
+ },
+ {
+ "epoch": 0.2693206618477874,
+ "grad_norm": 20067.330078125,
+ "learning_rate": 9.079205076793457e-05,
+ "loss": 0.4644,
+ "step": 52200
+ },
+ {
+ "epoch": 0.2695786318304002,
+ "grad_norm": 21526.298828125,
+ "learning_rate": 9.077091536579719e-05,
+ "loss": 0.4602,
+ "step": 52250
+ },
+ {
+ "epoch": 0.26983660181301306,
+ "grad_norm": 20446.767578125,
+ "learning_rate": 9.074975820093468e-05,
+ "loss": 0.4671,
+ "step": 52300
+ },
+ {
+ "epoch": 0.27009457179562585,
+ "grad_norm": 19936.599609375,
+ "learning_rate": 9.072857928464029e-05,
+ "loss": 0.4626,
+ "step": 52350
+ },
+ {
+ "epoch": 0.2703525417782387,
+ "grad_norm": 21716.60546875,
+ "learning_rate": 9.070737862821896e-05,
+ "loss": 0.4642,
+ "step": 52400
+ },
+ {
+ "epoch": 0.2706105117608515,
+ "grad_norm": 17588.40625,
+ "learning_rate": 9.068615624298717e-05,
+ "loss": 0.4595,
+ "step": 52450
+ },
+ {
+ "epoch": 0.27086848174346434,
+ "grad_norm": 21721.138671875,
+ "learning_rate": 9.066491214027302e-05,
+ "loss": 0.4639,
+ "step": 52500
+ },
+ {
+ "epoch": 0.27112645172607713,
+ "grad_norm": 19480.875,
+ "learning_rate": 9.06436463314162e-05,
+ "loss": 0.4654,
+ "step": 52550
+ },
+ {
+ "epoch": 0.27138442170869,
+ "grad_norm": 22658.076171875,
+ "learning_rate": 9.062235882776797e-05,
+ "loss": 0.4653,
+ "step": 52600
+ },
+ {
+ "epoch": 0.2716423916913028,
+ "grad_norm": 22396.4140625,
+ "learning_rate": 9.060104964069121e-05,
+ "loss": 0.4634,
+ "step": 52650
+ },
+ {
+ "epoch": 0.2719003616739156,
+ "grad_norm": 22354.28125,
+ "learning_rate": 9.057971878156036e-05,
+ "loss": 0.4626,
+ "step": 52700
+ },
+ {
+ "epoch": 0.2721583316565285,
+ "grad_norm": 19845.22265625,
+ "learning_rate": 9.05583662617614e-05,
+ "loss": 0.4666,
+ "step": 52750
+ },
+ {
+ "epoch": 0.27241630163914127,
+ "grad_norm": 19933.978515625,
+ "learning_rate": 9.053699209269188e-05,
+ "loss": 0.4601,
+ "step": 52800
+ },
+ {
+ "epoch": 0.2726742716217541,
+ "grad_norm": 21288.86328125,
+ "learning_rate": 9.051559628576094e-05,
+ "loss": 0.4622,
+ "step": 52850
+ },
+ {
+ "epoch": 0.2729322416043669,
+ "grad_norm": 20604.05078125,
+ "learning_rate": 9.049417885238927e-05,
+ "loss": 0.4618,
+ "step": 52900
+ },
+ {
+ "epoch": 0.27319021158697976,
+ "grad_norm": 18641.544921875,
+ "learning_rate": 9.047273980400903e-05,
+ "loss": 0.46,
+ "step": 52950
+ },
+ {
+ "epoch": 0.27344818156959255,
+ "grad_norm": 22482.8125,
+ "learning_rate": 9.045127915206398e-05,
+ "loss": 0.4673,
+ "step": 53000
+ },
+ {
+ "epoch": 0.2737061515522054,
+ "grad_norm": 20967.9375,
+ "learning_rate": 9.042979690800943e-05,
+ "loss": 0.4607,
+ "step": 53050
+ },
+ {
+ "epoch": 0.2739641215348182,
+ "grad_norm": 22371.90234375,
+ "learning_rate": 9.040829308331216e-05,
+ "loss": 0.4624,
+ "step": 53100
+ },
+ {
+ "epoch": 0.27422209151743104,
+ "grad_norm": 19802.947265625,
+ "learning_rate": 9.03867676894505e-05,
+ "loss": 0.4542,
+ "step": 53150
+ },
+ {
+ "epoch": 0.27448006150004384,
+ "grad_norm": 21255.974609375,
+ "learning_rate": 9.03652207379143e-05,
+ "loss": 0.4636,
+ "step": 53200
+ },
+ {
+ "epoch": 0.2747380314826567,
+ "grad_norm": 21687.16796875,
+ "learning_rate": 9.034365224020489e-05,
+ "loss": 0.4626,
+ "step": 53250
+ },
+ {
+ "epoch": 0.2749960014652695,
+ "grad_norm": 21386.275390625,
+ "learning_rate": 9.032206220783512e-05,
+ "loss": 0.4659,
+ "step": 53300
+ },
+ {
+ "epoch": 0.27525397144788233,
+ "grad_norm": 19433.888671875,
+ "learning_rate": 9.030045065232935e-05,
+ "loss": 0.4585,
+ "step": 53350
+ },
+ {
+ "epoch": 0.2755119414304951,
+ "grad_norm": 20615.021484375,
+ "learning_rate": 9.027881758522339e-05,
+ "loss": 0.4619,
+ "step": 53400
+ },
+ {
+ "epoch": 0.27576991141310797,
+ "grad_norm": 20498.369140625,
+ "learning_rate": 9.025716301806454e-05,
+ "loss": 0.4658,
+ "step": 53450
+ },
+ {
+ "epoch": 0.2760278813957208,
+ "grad_norm": 20348.955078125,
+ "learning_rate": 9.023548696241162e-05,
+ "loss": 0.4637,
+ "step": 53500
+ },
+ {
+ "epoch": 0.2762858513783336,
+ "grad_norm": 18524.3203125,
+ "learning_rate": 9.021378942983487e-05,
+ "loss": 0.4636,
+ "step": 53550
+ },
+ {
+ "epoch": 0.27654382136094646,
+ "grad_norm": 20778.064453125,
+ "learning_rate": 9.019207043191602e-05,
+ "loss": 0.4604,
+ "step": 53600
+ },
+ {
+ "epoch": 0.27680179134355926,
+ "grad_norm": 19481.369140625,
+ "learning_rate": 9.017032998024823e-05,
+ "loss": 0.4629,
+ "step": 53650
+ },
+ {
+ "epoch": 0.2770597613261721,
+ "grad_norm": 20873.8515625,
+ "learning_rate": 9.014856808643617e-05,
+ "loss": 0.4647,
+ "step": 53700
+ },
+ {
+ "epoch": 0.2773177313087849,
+ "grad_norm": 21859.05078125,
+ "learning_rate": 9.012678476209591e-05,
+ "loss": 0.4621,
+ "step": 53750
+ },
+ {
+ "epoch": 0.27757570129139775,
+ "grad_norm": 20832.587890625,
+ "learning_rate": 9.010498001885492e-05,
+ "loss": 0.463,
+ "step": 53800
+ },
+ {
+ "epoch": 0.27783367127401054,
+ "grad_norm": 18435.703125,
+ "learning_rate": 9.00831538683522e-05,
+ "loss": 0.466,
+ "step": 53850
+ },
+ {
+ "epoch": 0.2780916412566234,
+ "grad_norm": 21496.61328125,
+ "learning_rate": 9.006130632223811e-05,
+ "loss": 0.4611,
+ "step": 53900
+ },
+ {
+ "epoch": 0.2783496112392362,
+ "grad_norm": 21796.873046875,
+ "learning_rate": 9.003943739217444e-05,
+ "loss": 0.4587,
+ "step": 53950
+ },
+ {
+ "epoch": 0.27860758122184903,
+ "grad_norm": 21053.099609375,
+ "learning_rate": 9.001754708983443e-05,
+ "loss": 0.4659,
+ "step": 54000
+ },
+ {
+ "epoch": 0.2788655512044618,
+ "grad_norm": 20332.98828125,
+ "learning_rate": 8.999563542690266e-05,
+ "loss": 0.4586,
+ "step": 54050
+ },
+ {
+ "epoch": 0.2791235211870747,
+ "grad_norm": 19829.93359375,
+ "learning_rate": 8.997370241507516e-05,
+ "loss": 0.4608,
+ "step": 54100
+ },
+ {
+ "epoch": 0.27938149116968747,
+ "grad_norm": 21215.3515625,
+ "learning_rate": 8.995174806605937e-05,
+ "loss": 0.4672,
+ "step": 54150
+ },
+ {
+ "epoch": 0.2796394611523003,
+ "grad_norm": 19068.890625,
+ "learning_rate": 8.992977239157408e-05,
+ "loss": 0.4637,
+ "step": 54200
+ },
+ {
+ "epoch": 0.27989743113491317,
+ "grad_norm": 20632.857421875,
+ "learning_rate": 8.99077754033495e-05,
+ "loss": 0.4615,
+ "step": 54250
+ },
+ {
+ "epoch": 0.28015540111752596,
+ "grad_norm": 20244.943359375,
+ "learning_rate": 8.988575711312714e-05,
+ "loss": 0.4665,
+ "step": 54300
+ },
+ {
+ "epoch": 0.2804133711001388,
+ "grad_norm": 21873.34375,
+ "learning_rate": 8.986371753266001e-05,
+ "loss": 0.4636,
+ "step": 54350
+ },
+ {
+ "epoch": 0.2806713410827516,
+ "grad_norm": 18075.001953125,
+ "learning_rate": 8.984165667371236e-05,
+ "loss": 0.4626,
+ "step": 54400
+ },
+ {
+ "epoch": 0.28092931106536445,
+ "grad_norm": 19815.0546875,
+ "learning_rate": 8.981957454805987e-05,
+ "loss": 0.4535,
+ "step": 54450
+ },
+ {
+ "epoch": 0.28118728104797724,
+ "grad_norm": 22713.48046875,
+ "learning_rate": 8.979747116748955e-05,
+ "loss": 0.4592,
+ "step": 54500
+ },
+ {
+ "epoch": 0.2814452510305901,
+ "grad_norm": 23360.1953125,
+ "learning_rate": 8.977534654379976e-05,
+ "loss": 0.4646,
+ "step": 54550
+ },
+ {
+ "epoch": 0.2817032210132029,
+ "grad_norm": 21626.36328125,
+ "learning_rate": 8.975320068880018e-05,
+ "loss": 0.4644,
+ "step": 54600
+ },
+ {
+ "epoch": 0.28196119099581574,
+ "grad_norm": 20061.873046875,
+ "learning_rate": 8.973103361431184e-05,
+ "loss": 0.4674,
+ "step": 54650
+ },
+ {
+ "epoch": 0.28221916097842853,
+ "grad_norm": 21295.0625,
+ "learning_rate": 8.970884533216713e-05,
+ "loss": 0.4674,
+ "step": 54700
+ },
+ {
+ "epoch": 0.2824771309610414,
+ "grad_norm": 19434.23828125,
+ "learning_rate": 8.968663585420967e-05,
+ "loss": 0.46,
+ "step": 54750
+ },
+ {
+ "epoch": 0.28273510094365417,
+ "grad_norm": 23654.849609375,
+ "learning_rate": 8.966440519229449e-05,
+ "loss": 0.4649,
+ "step": 54800
+ },
+ {
+ "epoch": 0.282993070926267,
+ "grad_norm": 22763.603515625,
+ "learning_rate": 8.964215335828787e-05,
+ "loss": 0.4578,
+ "step": 54850
+ },
+ {
+ "epoch": 0.28325104090887987,
+ "grad_norm": 23262.849609375,
+ "learning_rate": 8.961988036406741e-05,
+ "loss": 0.4674,
+ "step": 54900
+ },
+ {
+ "epoch": 0.28350901089149266,
+ "grad_norm": 20148.380859375,
+ "learning_rate": 8.959758622152201e-05,
+ "loss": 0.4642,
+ "step": 54950
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "grad_norm": 22515.548828125,
+ "learning_rate": 8.957527094255186e-05,
+ "loss": 0.4697,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2837669808741055,
+ "eval_loss": 0.4508056044578552,
+ "eval_runtime": 3347.9938,
+ "eval_samples_per_second": 926.262,
+ "eval_steps_per_second": 1.809,
+ "step": 55000
+ },
+ {
+ "epoch": 0.2840249508567183,
+ "grad_norm": 21158.09375,
+ "learning_rate": 8.95529345390684e-05,
+ "loss": 0.4617,
+ "step": 55050
+ },
+ {
+ "epoch": 0.28428292083933115,
+ "grad_norm": 20892.517578125,
+ "learning_rate": 8.953057702299437e-05,
+ "loss": 0.4612,
+ "step": 55100
+ },
+ {
+ "epoch": 0.28454089082194395,
+ "grad_norm": 21489.740234375,
+ "learning_rate": 8.950819840626381e-05,
+ "loss": 0.4578,
+ "step": 55150
+ },
+ {
+ "epoch": 0.2847988608045568,
+ "grad_norm": 20703.072265625,
+ "learning_rate": 8.948579870082197e-05,
+ "loss": 0.4632,
+ "step": 55200
+ },
+ {
+ "epoch": 0.2850568307871696,
+ "grad_norm": 21731.775390625,
+ "learning_rate": 8.946337791862537e-05,
+ "loss": 0.4621,
+ "step": 55250
+ },
+ {
+ "epoch": 0.28531480076978244,
+ "grad_norm": 24507.076171875,
+ "learning_rate": 8.94409360716418e-05,
+ "loss": 0.4542,
+ "step": 55300
+ },
+ {
+ "epoch": 0.28557277075239523,
+ "grad_norm": 20686.79296875,
+ "learning_rate": 8.94184731718503e-05,
+ "loss": 0.4575,
+ "step": 55350
+ },
+ {
+ "epoch": 0.2858307407350081,
+ "grad_norm": 20055.396484375,
+ "learning_rate": 8.93959892312411e-05,
+ "loss": 0.4595,
+ "step": 55400
+ },
+ {
+ "epoch": 0.2860887107176209,
+ "grad_norm": 21203.28515625,
+ "learning_rate": 8.93734842618157e-05,
+ "loss": 0.457,
+ "step": 55450
+ },
+ {
+ "epoch": 0.2863466807002337,
+ "grad_norm": 21738.6328125,
+ "learning_rate": 8.935095827558684e-05,
+ "loss": 0.4639,
+ "step": 55500
+ },
+ {
+ "epoch": 0.2866046506828465,
+ "grad_norm": 21593.056640625,
+ "learning_rate": 8.932841128457844e-05,
+ "loss": 0.4566,
+ "step": 55550
+ },
+ {
+ "epoch": 0.28686262066545937,
+ "grad_norm": 20362.564453125,
+ "learning_rate": 8.930584330082564e-05,
+ "loss": 0.4613,
+ "step": 55600
+ },
+ {
+ "epoch": 0.2871205906480722,
+ "grad_norm": 20415.390625,
+ "learning_rate": 8.928325433637482e-05,
+ "loss": 0.4591,
+ "step": 55650
+ },
+ {
+ "epoch": 0.287378560630685,
+ "grad_norm": 21615.1953125,
+ "learning_rate": 8.926064440328348e-05,
+ "loss": 0.4645,
+ "step": 55700
+ },
+ {
+ "epoch": 0.28763653061329786,
+ "grad_norm": 19537.873046875,
+ "learning_rate": 8.92380135136204e-05,
+ "loss": 0.4595,
+ "step": 55750
+ },
+ {
+ "epoch": 0.28789450059591065,
+ "grad_norm": 21288.21484375,
+ "learning_rate": 8.921536167946552e-05,
+ "loss": 0.4565,
+ "step": 55800
+ },
+ {
+ "epoch": 0.2881524705785235,
+ "grad_norm": 25019.783203125,
+ "learning_rate": 8.919268891290992e-05,
+ "loss": 0.4635,
+ "step": 55850
+ },
+ {
+ "epoch": 0.2884104405611363,
+ "grad_norm": 23099.5625,
+ "learning_rate": 8.916999522605592e-05,
+ "loss": 0.4561,
+ "step": 55900
+ },
+ {
+ "epoch": 0.28866841054374914,
+ "grad_norm": 22477.849609375,
+ "learning_rate": 8.914728063101694e-05,
+ "loss": 0.458,
+ "step": 55950
+ },
+ {
+ "epoch": 0.28892638052636194,
+ "grad_norm": 19823.103515625,
+ "learning_rate": 8.91245451399176e-05,
+ "loss": 0.457,
+ "step": 56000
+ },
+ {
+ "epoch": 0.2891843505089748,
+ "grad_norm": 20293.353515625,
+ "learning_rate": 8.910178876489368e-05,
+ "loss": 0.4614,
+ "step": 56050
+ },
+ {
+ "epoch": 0.2894423204915876,
+ "grad_norm": 19020.892578125,
+ "learning_rate": 8.907901151809205e-05,
+ "loss": 0.4597,
+ "step": 56100
+ },
+ {
+ "epoch": 0.28970029047420043,
+ "grad_norm": 20133.603515625,
+ "learning_rate": 8.905621341167082e-05,
+ "loss": 0.4577,
+ "step": 56150
+ },
+ {
+ "epoch": 0.2899582604568132,
+ "grad_norm": 21008.95703125,
+ "learning_rate": 8.903339445779915e-05,
+ "loss": 0.4596,
+ "step": 56200
+ },
+ {
+ "epoch": 0.29021623043942607,
+ "grad_norm": 21339.892578125,
+ "learning_rate": 8.901055466865735e-05,
+ "loss": 0.4631,
+ "step": 56250
+ },
+ {
+ "epoch": 0.29047420042203886,
+ "grad_norm": 20088.455078125,
+ "learning_rate": 8.898769405643686e-05,
+ "loss": 0.4571,
+ "step": 56300
+ },
+ {
+ "epoch": 0.2907321704046517,
+ "grad_norm": 21779.341796875,
+ "learning_rate": 8.896481263334023e-05,
+ "loss": 0.4541,
+ "step": 56350
+ },
+ {
+ "epoch": 0.29099014038726456,
+ "grad_norm": 24433.103515625,
+ "learning_rate": 8.894191041158113e-05,
+ "loss": 0.4627,
+ "step": 56400
+ },
+ {
+ "epoch": 0.29124811036987736,
+ "grad_norm": 22214.70703125,
+ "learning_rate": 8.891898740338432e-05,
+ "loss": 0.4585,
+ "step": 56450
+ },
+ {
+ "epoch": 0.2915060803524902,
+ "grad_norm": 20558.955078125,
+ "learning_rate": 8.889604362098567e-05,
+ "loss": 0.4547,
+ "step": 56500
+ },
+ {
+ "epoch": 0.291764050335103,
+ "grad_norm": 22438.3828125,
+ "learning_rate": 8.88730790766321e-05,
+ "loss": 0.4581,
+ "step": 56550
+ },
+ {
+ "epoch": 0.29202202031771585,
+ "grad_norm": 22429.658203125,
+ "learning_rate": 8.885009378258164e-05,
+ "loss": 0.4556,
+ "step": 56600
+ },
+ {
+ "epoch": 0.29227999030032864,
+ "grad_norm": 18076.814453125,
+ "learning_rate": 8.882708775110342e-05,
+ "loss": 0.4571,
+ "step": 56650
+ },
+ {
+ "epoch": 0.2925379602829415,
+ "grad_norm": 19816.873046875,
+ "learning_rate": 8.88040609944776e-05,
+ "loss": 0.4584,
+ "step": 56700
+ },
+ {
+ "epoch": 0.2927959302655543,
+ "grad_norm": 20448.5234375,
+ "learning_rate": 8.878101352499542e-05,
+ "loss": 0.4575,
+ "step": 56750
+ },
+ {
+ "epoch": 0.29305390024816713,
+ "grad_norm": 19950.4609375,
+ "learning_rate": 8.875794535495915e-05,
+ "loss": 0.4558,
+ "step": 56800
+ },
+ {
+ "epoch": 0.2933118702307799,
+ "grad_norm": 20185.0625,
+ "learning_rate": 8.873485649668218e-05,
+ "loss": 0.4523,
+ "step": 56850
+ },
+ {
+ "epoch": 0.2935698402133928,
+ "grad_norm": 22338.080078125,
+ "learning_rate": 8.871174696248888e-05,
+ "loss": 0.4648,
+ "step": 56900
+ },
+ {
+ "epoch": 0.29382781019600557,
+ "grad_norm": 22531.541015625,
+ "learning_rate": 8.868861676471463e-05,
+ "loss": 0.4628,
+ "step": 56950
+ },
+ {
+ "epoch": 0.2940857801786184,
+ "grad_norm": 19558.10546875,
+ "learning_rate": 8.866546591570592e-05,
+ "loss": 0.4565,
+ "step": 57000
+ },
+ {
+ "epoch": 0.29434375016123127,
+ "grad_norm": 20166.33203125,
+ "learning_rate": 8.864229442782023e-05,
+ "loss": 0.4527,
+ "step": 57050
+ },
+ {
+ "epoch": 0.29460172014384406,
+ "grad_norm": 20262.185546875,
+ "learning_rate": 8.861910231342603e-05,
+ "loss": 0.4575,
+ "step": 57100
+ },
+ {
+ "epoch": 0.2948596901264569,
+ "grad_norm": 19107.080078125,
+ "learning_rate": 8.859588958490283e-05,
+ "loss": 0.4564,
+ "step": 57150
+ },
+ {
+ "epoch": 0.2951176601090697,
+ "grad_norm": 19690.37109375,
+ "learning_rate": 8.857265625464113e-05,
+ "loss": 0.4576,
+ "step": 57200
+ },
+ {
+ "epoch": 0.29537563009168255,
+ "grad_norm": 21793.189453125,
+ "learning_rate": 8.854940233504245e-05,
+ "loss": 0.4616,
+ "step": 57250
+ },
+ {
+ "epoch": 0.29563360007429534,
+ "grad_norm": 21543.033203125,
+ "learning_rate": 8.852612783851926e-05,
+ "loss": 0.4559,
+ "step": 57300
+ },
+ {
+ "epoch": 0.2958915700569082,
+ "grad_norm": 21455.56640625,
+ "learning_rate": 8.850283277749504e-05,
+ "loss": 0.4583,
+ "step": 57350
+ },
+ {
+ "epoch": 0.296149540039521,
+ "grad_norm": 21236.935546875,
+ "learning_rate": 8.847951716440426e-05,
+ "loss": 0.46,
+ "step": 57400
+ },
+ {
+ "epoch": 0.29640751002213384,
+ "grad_norm": 22411.130859375,
+ "learning_rate": 8.845618101169232e-05,
+ "loss": 0.4563,
+ "step": 57450
+ },
+ {
+ "epoch": 0.29666548000474663,
+ "grad_norm": 19269.26171875,
+ "learning_rate": 8.843282433181561e-05,
+ "loss": 0.4634,
+ "step": 57500
+ },
+ {
+ "epoch": 0.2969234499873595,
+ "grad_norm": 22179.669921875,
+ "learning_rate": 8.840944713724149e-05,
+ "loss": 0.4582,
+ "step": 57550
+ },
+ {
+ "epoch": 0.29718141996997227,
+ "grad_norm": 19867.076171875,
+ "learning_rate": 8.838604944044825e-05,
+ "loss": 0.4591,
+ "step": 57600
+ },
+ {
+ "epoch": 0.2974393899525851,
+ "grad_norm": 19806.09375,
+ "learning_rate": 8.836263125392511e-05,
+ "loss": 0.4571,
+ "step": 57650
+ },
+ {
+ "epoch": 0.2976973599351979,
+ "grad_norm": 21762.22265625,
+ "learning_rate": 8.833919259017225e-05,
+ "loss": 0.4526,
+ "step": 57700
+ },
+ {
+ "epoch": 0.29795532991781076,
+ "grad_norm": 21031.263671875,
+ "learning_rate": 8.83157334617008e-05,
+ "loss": 0.4577,
+ "step": 57750
+ },
+ {
+ "epoch": 0.2982132999004236,
+ "grad_norm": 22886.556640625,
+ "learning_rate": 8.829225388103276e-05,
+ "loss": 0.4553,
+ "step": 57800
+ },
+ {
+ "epoch": 0.2984712698830364,
+ "grad_norm": 19710.173828125,
+ "learning_rate": 8.826875386070108e-05,
+ "loss": 0.4556,
+ "step": 57850
+ },
+ {
+ "epoch": 0.29872923986564925,
+ "grad_norm": 20607.244140625,
+ "learning_rate": 8.824523341324963e-05,
+ "loss": 0.458,
+ "step": 57900
+ },
+ {
+ "epoch": 0.29898720984826205,
+ "grad_norm": 20672.05859375,
+ "learning_rate": 8.822169255123317e-05,
+ "loss": 0.4531,
+ "step": 57950
+ },
+ {
+ "epoch": 0.2992451798308749,
+ "grad_norm": 21375.76953125,
+ "learning_rate": 8.819813128721732e-05,
+ "loss": 0.4602,
+ "step": 58000
+ },
+ {
+ "epoch": 0.2995031498134877,
+ "grad_norm": 20848.328125,
+ "learning_rate": 8.817454963377865e-05,
+ "loss": 0.4557,
+ "step": 58050
+ },
+ {
+ "epoch": 0.29976111979610054,
+ "grad_norm": 20778.619140625,
+ "learning_rate": 8.81509476035046e-05,
+ "loss": 0.4588,
+ "step": 58100
+ },
+ {
+ "epoch": 0.30001908977871333,
+ "grad_norm": 19791.296875,
+ "learning_rate": 8.812732520899347e-05,
+ "loss": 0.4609,
+ "step": 58150
+ },
+ {
+ "epoch": 0.3002770597613262,
+ "grad_norm": 21814.482421875,
+ "learning_rate": 8.810368246285445e-05,
+ "loss": 0.4597,
+ "step": 58200
+ },
+ {
+ "epoch": 0.300535029743939,
+ "grad_norm": 22417.65625,
+ "learning_rate": 8.808001937770755e-05,
+ "loss": 0.461,
+ "step": 58250
+ },
+ {
+ "epoch": 0.3007929997265518,
+ "grad_norm": 21347.53515625,
+ "learning_rate": 8.80563359661837e-05,
+ "loss": 0.4523,
+ "step": 58300
+ },
+ {
+ "epoch": 0.3010509697091646,
+ "grad_norm": 21612.689453125,
+ "learning_rate": 8.803263224092461e-05,
+ "loss": 0.4588,
+ "step": 58350
+ },
+ {
+ "epoch": 0.30130893969177747,
+ "grad_norm": 19139.7109375,
+ "learning_rate": 8.80089082145829e-05,
+ "loss": 0.4576,
+ "step": 58400
+ },
+ {
+ "epoch": 0.3015669096743903,
+ "grad_norm": 21629.78125,
+ "learning_rate": 8.798516389982197e-05,
+ "loss": 0.4514,
+ "step": 58450
+ },
+ {
+ "epoch": 0.3018248796570031,
+ "grad_norm": 20307.630859375,
+ "learning_rate": 8.79613993093161e-05,
+ "loss": 0.4606,
+ "step": 58500
+ },
+ {
+ "epoch": 0.30208284963961596,
+ "grad_norm": 17832.3359375,
+ "learning_rate": 8.793761445575037e-05,
+ "loss": 0.4654,
+ "step": 58550
+ },
+ {
+ "epoch": 0.30234081962222875,
+ "grad_norm": 19975.20703125,
+ "learning_rate": 8.791380935182065e-05,
+ "loss": 0.4519,
+ "step": 58600
+ },
+ {
+ "epoch": 0.3025987896048416,
+ "grad_norm": 23387.681640625,
+ "learning_rate": 8.788998401023365e-05,
+ "loss": 0.4576,
+ "step": 58650
+ },
+ {
+ "epoch": 0.3028567595874544,
+ "grad_norm": 18704.669921875,
+ "learning_rate": 8.78661384437069e-05,
+ "loss": 0.4634,
+ "step": 58700
+ },
+ {
+ "epoch": 0.30311472957006724,
+ "grad_norm": 21739.806640625,
+ "learning_rate": 8.784227266496868e-05,
+ "loss": 0.4471,
+ "step": 58750
+ },
+ {
+ "epoch": 0.30337269955268004,
+ "grad_norm": 22190.74609375,
+ "learning_rate": 8.781838668675806e-05,
+ "loss": 0.4508,
+ "step": 58800
+ },
+ {
+ "epoch": 0.3036306695352929,
+ "grad_norm": 19186.9609375,
+ "learning_rate": 8.779448052182495e-05,
+ "loss": 0.4575,
+ "step": 58850
+ },
+ {
+ "epoch": 0.3038886395179057,
+ "grad_norm": 21925.8984375,
+ "learning_rate": 8.777055418293e-05,
+ "loss": 0.4614,
+ "step": 58900
+ },
+ {
+ "epoch": 0.3041466095005185,
+ "grad_norm": 21280.16796875,
+ "learning_rate": 8.774660768284459e-05,
+ "loss": 0.4621,
+ "step": 58950
+ },
+ {
+ "epoch": 0.3044045794831313,
+ "grad_norm": 19872.3828125,
+ "learning_rate": 8.772264103435094e-05,
+ "loss": 0.4617,
+ "step": 59000
+ },
+ {
+ "epoch": 0.30466254946574417,
+ "grad_norm": 17518.58984375,
+ "learning_rate": 8.769865425024195e-05,
+ "loss": 0.4548,
+ "step": 59050
+ },
+ {
+ "epoch": 0.30492051944835696,
+ "grad_norm": 25605.537109375,
+ "learning_rate": 8.767464734332131e-05,
+ "loss": 0.4532,
+ "step": 59100
+ },
+ {
+ "epoch": 0.3051784894309698,
+ "grad_norm": 20151.53515625,
+ "learning_rate": 8.765062032640346e-05,
+ "loss": 0.4558,
+ "step": 59150
+ },
+ {
+ "epoch": 0.30543645941358266,
+ "grad_norm": 19346.048828125,
+ "learning_rate": 8.762657321231353e-05,
+ "loss": 0.4624,
+ "step": 59200
+ },
+ {
+ "epoch": 0.30569442939619546,
+ "grad_norm": 21447.115234375,
+ "learning_rate": 8.760250601388741e-05,
+ "loss": 0.4632,
+ "step": 59250
+ },
+ {
+ "epoch": 0.3059523993788083,
+ "grad_norm": 19053.896484375,
+ "learning_rate": 8.757841874397172e-05,
+ "loss": 0.454,
+ "step": 59300
+ },
+ {
+ "epoch": 0.3062103693614211,
+ "grad_norm": 20928.8515625,
+ "learning_rate": 8.755431141542376e-05,
+ "loss": 0.4509,
+ "step": 59350
+ },
+ {
+ "epoch": 0.30646833934403395,
+ "grad_norm": 20900.40234375,
+ "learning_rate": 8.753018404111157e-05,
+ "loss": 0.4523,
+ "step": 59400
+ },
+ {
+ "epoch": 0.30672630932664674,
+ "grad_norm": 19776.572265625,
+ "learning_rate": 8.750603663391385e-05,
+ "loss": 0.458,
+ "step": 59450
+ },
+ {
+ "epoch": 0.3069842793092596,
+ "grad_norm": 21503.505859375,
+ "learning_rate": 8.748186920672005e-05,
+ "loss": 0.4496,
+ "step": 59500
+ },
+ {
+ "epoch": 0.3072422492918724,
+ "grad_norm": 20588.5078125,
+ "learning_rate": 8.745768177243027e-05,
+ "loss": 0.4578,
+ "step": 59550
+ },
+ {
+ "epoch": 0.30750021927448523,
+ "grad_norm": 20516.150390625,
+ "learning_rate": 8.743347434395528e-05,
+ "loss": 0.46,
+ "step": 59600
+ },
+ {
+ "epoch": 0.307758189257098,
+ "grad_norm": 20487.498046875,
+ "learning_rate": 8.740924693421655e-05,
+ "loss": 0.4574,
+ "step": 59650
+ },
+ {
+ "epoch": 0.3080161592397109,
+ "grad_norm": 21070.3671875,
+ "learning_rate": 8.738499955614619e-05,
+ "loss": 0.4564,
+ "step": 59700
+ },
+ {
+ "epoch": 0.30827412922232367,
+ "grad_norm": 19067.427734375,
+ "learning_rate": 8.736073222268697e-05,
+ "loss": 0.4523,
+ "step": 59750
+ },
+ {
+ "epoch": 0.3085320992049365,
+ "grad_norm": 22084.68359375,
+ "learning_rate": 8.733644494679236e-05,
+ "loss": 0.4558,
+ "step": 59800
+ },
+ {
+ "epoch": 0.3087900691875493,
+ "grad_norm": 22324.9140625,
+ "learning_rate": 8.731213774142639e-05,
+ "loss": 0.4585,
+ "step": 59850
+ },
+ {
+ "epoch": 0.30904803917016216,
+ "grad_norm": 19219.47265625,
+ "learning_rate": 8.728781061956383e-05,
+ "loss": 0.4571,
+ "step": 59900
+ },
+ {
+ "epoch": 0.309306009152775,
+ "grad_norm": 20598.125,
+ "learning_rate": 8.726346359418998e-05,
+ "loss": 0.4581,
+ "step": 59950
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "grad_norm": 22155.720703125,
+ "learning_rate": 8.723909667830082e-05,
+ "loss": 0.4578,
+ "step": 60000
+ },
+ {
+ "epoch": 0.3095639791353878,
+ "eval_loss": 0.44494956731796265,
+ "eval_runtime": 3261.5111,
+ "eval_samples_per_second": 950.823,
+ "eval_steps_per_second": 1.857,
+ "step": 60000
+ },
+ {
+ "epoch": 0.30982194911800065,
+ "grad_norm": 22012.822265625,
+ "learning_rate": 8.721470988490297e-05,
+ "loss": 0.4533,
+ "step": 60050
+ },
+ {
+ "epoch": 0.31007991910061344,
+ "grad_norm": 20934.453125,
+ "learning_rate": 8.719030322701358e-05,
+ "loss": 0.4538,
+ "step": 60100
+ },
+ {
+ "epoch": 0.3103378890832263,
+ "grad_norm": 20173.20703125,
+ "learning_rate": 8.716587671766049e-05,
+ "loss": 0.4559,
+ "step": 60150
+ },
+ {
+ "epoch": 0.3105958590658391,
+ "grad_norm": 19343.833984375,
+ "learning_rate": 8.714143036988208e-05,
+ "loss": 0.4579,
+ "step": 60200
+ },
+ {
+ "epoch": 0.31085382904845194,
+ "grad_norm": 20720.435546875,
+ "learning_rate": 8.711696419672734e-05,
+ "loss": 0.4529,
+ "step": 60250
+ },
+ {
+ "epoch": 0.31111179903106473,
+ "grad_norm": 22050.85546875,
+ "learning_rate": 8.709247821125583e-05,
+ "loss": 0.4505,
+ "step": 60300
+ },
+ {
+ "epoch": 0.3113697690136776,
+ "grad_norm": 22470.55078125,
+ "learning_rate": 8.706797242653773e-05,
+ "loss": 0.4616,
+ "step": 60350
+ },
+ {
+ "epoch": 0.31162773899629037,
+ "grad_norm": 21057.978515625,
+ "learning_rate": 8.70434468556537e-05,
+ "loss": 0.4568,
+ "step": 60400
+ },
+ {
+ "epoch": 0.3118857089789032,
+ "grad_norm": 21035.34375,
+ "learning_rate": 8.701890151169507e-05,
+ "loss": 0.4551,
+ "step": 60450
+ },
+ {
+ "epoch": 0.312143678961516,
+ "grad_norm": 20412.056640625,
+ "learning_rate": 8.699433640776363e-05,
+ "loss": 0.4521,
+ "step": 60500
+ },
+ {
+ "epoch": 0.31240164894412886,
+ "grad_norm": 19888.26953125,
+ "learning_rate": 8.696975155697175e-05,
+ "loss": 0.4565,
+ "step": 60550
+ },
+ {
+ "epoch": 0.3126596189267417,
+ "grad_norm": 22491.900390625,
+ "learning_rate": 8.694514697244238e-05,
+ "loss": 0.4578,
+ "step": 60600
+ },
+ {
+ "epoch": 0.3129175889093545,
+ "grad_norm": 20026.357421875,
+ "learning_rate": 8.692052266730897e-05,
+ "loss": 0.4554,
+ "step": 60650
+ },
+ {
+ "epoch": 0.31317555889196735,
+ "grad_norm": 22979.109375,
+ "learning_rate": 8.689587865471547e-05,
+ "loss": 0.461,
+ "step": 60700
+ },
+ {
+ "epoch": 0.31343352887458015,
+ "grad_norm": 21558.291015625,
+ "learning_rate": 8.68712149478164e-05,
+ "loss": 0.4546,
+ "step": 60750
+ },
+ {
+ "epoch": 0.313691498857193,
+ "grad_norm": 22115.384765625,
+ "learning_rate": 8.684653155977676e-05,
+ "loss": 0.4518,
+ "step": 60800
+ },
+ {
+ "epoch": 0.3139494688398058,
+ "grad_norm": 21422.41015625,
+ "learning_rate": 8.682182850377205e-05,
+ "loss": 0.4602,
+ "step": 60850
+ },
+ {
+ "epoch": 0.31420743882241864,
+ "grad_norm": 21101.02734375,
+ "learning_rate": 8.679710579298832e-05,
+ "loss": 0.4579,
+ "step": 60900
+ },
+ {
+ "epoch": 0.31446540880503143,
+ "grad_norm": 18844.361328125,
+ "learning_rate": 8.677236344062203e-05,
+ "loss": 0.4569,
+ "step": 60950
+ },
+ {
+ "epoch": 0.3147233787876443,
+ "grad_norm": 20492.796875,
+ "learning_rate": 8.67476014598802e-05,
+ "loss": 0.4542,
+ "step": 61000
+ },
+ {
+ "epoch": 0.3149813487702571,
+ "grad_norm": 28102.55078125,
+ "learning_rate": 8.67228198639803e-05,
+ "loss": 0.4516,
+ "step": 61050
+ },
+ {
+ "epoch": 0.3152393187528699,
+ "grad_norm": 20697.494140625,
+ "learning_rate": 8.669801866615024e-05,
+ "loss": 0.4551,
+ "step": 61100
+ },
+ {
+ "epoch": 0.3154972887354827,
+ "grad_norm": 20726.90625,
+ "learning_rate": 8.667319787962842e-05,
+ "loss": 0.4576,
+ "step": 61150
+ },
+ {
+ "epoch": 0.31575525871809557,
+ "grad_norm": 20007.04296875,
+ "learning_rate": 8.664835751766371e-05,
+ "loss": 0.4544,
+ "step": 61200
+ },
+ {
+ "epoch": 0.31601322870070836,
+ "grad_norm": 23061.224609375,
+ "learning_rate": 8.662349759351542e-05,
+ "loss": 0.458,
+ "step": 61250
+ },
+ {
+ "epoch": 0.3162711986833212,
+ "grad_norm": 19895.3125,
+ "learning_rate": 8.65986181204533e-05,
+ "loss": 0.4555,
+ "step": 61300
+ },
+ {
+ "epoch": 0.31652916866593406,
+ "grad_norm": 22702.5234375,
+ "learning_rate": 8.65737191117575e-05,
+ "loss": 0.4586,
+ "step": 61350
+ },
+ {
+ "epoch": 0.31678713864854685,
+ "grad_norm": 20045.404296875,
+ "learning_rate": 8.654880058071866e-05,
+ "loss": 0.4583,
+ "step": 61400
+ },
+ {
+ "epoch": 0.3170451086311597,
+ "grad_norm": 21180.455078125,
+ "learning_rate": 8.652386254063778e-05,
+ "loss": 0.4594,
+ "step": 61450
+ },
+ {
+ "epoch": 0.3173030786137725,
+ "grad_norm": 19104.767578125,
+ "learning_rate": 8.649890500482633e-05,
+ "loss": 0.4532,
+ "step": 61500
+ },
+ {
+ "epoch": 0.31756104859638534,
+ "grad_norm": 23137.869140625,
+ "learning_rate": 8.647392798660613e-05,
+ "loss": 0.4535,
+ "step": 61550
+ },
+ {
+ "epoch": 0.31781901857899814,
+ "grad_norm": 21784.001953125,
+ "learning_rate": 8.644893149930949e-05,
+ "loss": 0.4518,
+ "step": 61600
+ },
+ {
+ "epoch": 0.318076988561611,
+ "grad_norm": 20489.796875,
+ "learning_rate": 8.642391555627897e-05,
+ "loss": 0.4572,
+ "step": 61650
+ },
+ {
+ "epoch": 0.3183349585442238,
+ "grad_norm": 21743.728515625,
+ "learning_rate": 8.639888017086764e-05,
+ "loss": 0.4601,
+ "step": 61700
+ },
+ {
+ "epoch": 0.3185929285268366,
+ "grad_norm": 21714.6171875,
+ "learning_rate": 8.63738253564389e-05,
+ "loss": 0.4597,
+ "step": 61750
+ },
+ {
+ "epoch": 0.3188508985094494,
+ "grad_norm": 19896.208984375,
+ "learning_rate": 8.634875112636653e-05,
+ "loss": 0.4532,
+ "step": 61800
+ },
+ {
+ "epoch": 0.31910886849206227,
+ "grad_norm": 22215.173828125,
+ "learning_rate": 8.632365749403465e-05,
+ "loss": 0.4532,
+ "step": 61850
+ },
+ {
+ "epoch": 0.31936683847467506,
+ "grad_norm": 22466.958984375,
+ "learning_rate": 8.629854447283778e-05,
+ "loss": 0.4539,
+ "step": 61900
+ },
+ {
+ "epoch": 0.3196248084572879,
+ "grad_norm": 21345.197265625,
+ "learning_rate": 8.627341207618073e-05,
+ "loss": 0.4551,
+ "step": 61950
+ },
+ {
+ "epoch": 0.3198827784399007,
+ "grad_norm": 20988.8203125,
+ "learning_rate": 8.624826031747872e-05,
+ "loss": 0.4593,
+ "step": 62000
+ },
+ {
+ "epoch": 0.32014074842251355,
+ "grad_norm": 23295.70703125,
+ "learning_rate": 8.622308921015726e-05,
+ "loss": 0.4547,
+ "step": 62050
+ },
+ {
+ "epoch": 0.3203987184051264,
+ "grad_norm": 22620.431640625,
+ "learning_rate": 8.619789876765221e-05,
+ "loss": 0.4601,
+ "step": 62100
+ },
+ {
+ "epoch": 0.3206566883877392,
+ "grad_norm": 21914.44140625,
+ "learning_rate": 8.61726890034097e-05,
+ "loss": 0.4474,
+ "step": 62150
+ },
+ {
+ "epoch": 0.32091465837035205,
+ "grad_norm": 20521.265625,
+ "learning_rate": 8.614745993088626e-05,
+ "loss": 0.4565,
+ "step": 62200
+ },
+ {
+ "epoch": 0.32117262835296484,
+ "grad_norm": 22810.072265625,
+ "learning_rate": 8.612221156354868e-05,
+ "loss": 0.453,
+ "step": 62250
+ },
+ {
+ "epoch": 0.3214305983355777,
+ "grad_norm": 20862.349609375,
+ "learning_rate": 8.609694391487402e-05,
+ "loss": 0.4543,
+ "step": 62300
+ },
+ {
+ "epoch": 0.3216885683181905,
+ "grad_norm": 22115.298828125,
+ "learning_rate": 8.607165699834967e-05,
+ "loss": 0.453,
+ "step": 62350
+ },
+ {
+ "epoch": 0.32194653830080333,
+ "grad_norm": 22504.859375,
+ "learning_rate": 8.60463508274733e-05,
+ "loss": 0.4552,
+ "step": 62400
+ },
+ {
+ "epoch": 0.3222045082834161,
+ "grad_norm": 21758.9453125,
+ "learning_rate": 8.602102541575286e-05,
+ "loss": 0.4526,
+ "step": 62450
+ },
+ {
+ "epoch": 0.322462478266029,
+ "grad_norm": 20388.23828125,
+ "learning_rate": 8.599568077670654e-05,
+ "loss": 0.4522,
+ "step": 62500
+ },
+ {
+ "epoch": 0.32272044824864177,
+ "grad_norm": 22393.857421875,
+ "learning_rate": 8.597031692386286e-05,
+ "loss": 0.4457,
+ "step": 62550
+ },
+ {
+ "epoch": 0.3229784182312546,
+ "grad_norm": 22233.978515625,
+ "learning_rate": 8.594493387076052e-05,
+ "loss": 0.449,
+ "step": 62600
+ },
+ {
+ "epoch": 0.3232363882138674,
+ "grad_norm": 19831.12109375,
+ "learning_rate": 8.591953163094852e-05,
+ "loss": 0.4556,
+ "step": 62650
+ },
+ {
+ "epoch": 0.32349435819648026,
+ "grad_norm": 19109.783203125,
+ "learning_rate": 8.589411021798608e-05,
+ "loss": 0.4552,
+ "step": 62700
+ },
+ {
+ "epoch": 0.3237523281790931,
+ "grad_norm": 23053.642578125,
+ "learning_rate": 8.586866964544265e-05,
+ "loss": 0.4552,
+ "step": 62750
+ },
+ {
+ "epoch": 0.3240102981617059,
+ "grad_norm": 17938.240234375,
+ "learning_rate": 8.584320992689791e-05,
+ "loss": 0.4512,
+ "step": 62800
+ },
+ {
+ "epoch": 0.32426826814431875,
+ "grad_norm": 19569.431640625,
+ "learning_rate": 8.581773107594179e-05,
+ "loss": 0.4557,
+ "step": 62850
+ },
+ {
+ "epoch": 0.32452623812693154,
+ "grad_norm": 19247.82421875,
+ "learning_rate": 8.579223310617439e-05,
+ "loss": 0.4599,
+ "step": 62900
+ },
+ {
+ "epoch": 0.3247842081095444,
+ "grad_norm": 21565.8671875,
+ "learning_rate": 8.576671603120603e-05,
+ "loss": 0.4573,
+ "step": 62950
+ },
+ {
+ "epoch": 0.3250421780921572,
+ "grad_norm": 19029.005859375,
+ "learning_rate": 8.574117986465723e-05,
+ "loss": 0.455,
+ "step": 63000
+ },
+ {
+ "epoch": 0.32530014807477003,
+ "grad_norm": 21574.626953125,
+ "learning_rate": 8.57156246201587e-05,
+ "loss": 0.4512,
+ "step": 63050
+ },
+ {
+ "epoch": 0.32555811805738283,
+ "grad_norm": 21181.8203125,
+ "learning_rate": 8.569005031135136e-05,
+ "loss": 0.4513,
+ "step": 63100
+ },
+ {
+ "epoch": 0.3258160880399957,
+ "grad_norm": 22689.93359375,
+ "learning_rate": 8.566445695188624e-05,
+ "loss": 0.4515,
+ "step": 63150
+ },
+ {
+ "epoch": 0.32607405802260847,
+ "grad_norm": 22001.9921875,
+ "learning_rate": 8.563884455542461e-05,
+ "loss": 0.4459,
+ "step": 63200
+ },
+ {
+ "epoch": 0.3263320280052213,
+ "grad_norm": 20342.96875,
+ "learning_rate": 8.561321313563786e-05,
+ "loss": 0.4526,
+ "step": 63250
+ },
+ {
+ "epoch": 0.3265899979878341,
+ "grad_norm": 20673.75390625,
+ "learning_rate": 8.558756270620756e-05,
+ "loss": 0.4581,
+ "step": 63300
+ },
+ {
+ "epoch": 0.32684796797044696,
+ "grad_norm": 23113.490234375,
+ "learning_rate": 8.556189328082538e-05,
+ "loss": 0.4525,
+ "step": 63350
+ },
+ {
+ "epoch": 0.32710593795305976,
+ "grad_norm": 21878.384765625,
+ "learning_rate": 8.55362048731932e-05,
+ "loss": 0.4536,
+ "step": 63400
+ },
+ {
+ "epoch": 0.3273639079356726,
+ "grad_norm": 22787.79296875,
+ "learning_rate": 8.551049749702297e-05,
+ "loss": 0.4586,
+ "step": 63450
+ },
+ {
+ "epoch": 0.32762187791828545,
+ "grad_norm": 20422.0625,
+ "learning_rate": 8.548477116603679e-05,
+ "loss": 0.4496,
+ "step": 63500
+ },
+ {
+ "epoch": 0.32787984790089825,
+ "grad_norm": 21936.8828125,
+ "learning_rate": 8.54590258939669e-05,
+ "loss": 0.4509,
+ "step": 63550
+ },
+ {
+ "epoch": 0.3281378178835111,
+ "grad_norm": 21049.275390625,
+ "learning_rate": 8.54332616945556e-05,
+ "loss": 0.4514,
+ "step": 63600
+ },
+ {
+ "epoch": 0.3283957878661239,
+ "grad_norm": 22976.1015625,
+ "learning_rate": 8.540747858155533e-05,
+ "loss": 0.4611,
+ "step": 63650
+ },
+ {
+ "epoch": 0.32865375784873674,
+ "grad_norm": 21968.18359375,
+ "learning_rate": 8.538167656872861e-05,
+ "loss": 0.4557,
+ "step": 63700
+ },
+ {
+ "epoch": 0.32891172783134953,
+ "grad_norm": 22231.755859375,
+ "learning_rate": 8.53558556698481e-05,
+ "loss": 0.4556,
+ "step": 63750
+ },
+ {
+ "epoch": 0.3291696978139624,
+ "grad_norm": 21183.978515625,
+ "learning_rate": 8.533001589869643e-05,
+ "loss": 0.4479,
+ "step": 63800
+ },
+ {
+ "epoch": 0.3294276677965752,
+ "grad_norm": 23931.5234375,
+ "learning_rate": 8.530415726906642e-05,
+ "loss": 0.4533,
+ "step": 63850
+ },
+ {
+ "epoch": 0.329685637779188,
+ "grad_norm": 21073.62890625,
+ "learning_rate": 8.527827979476087e-05,
+ "loss": 0.4577,
+ "step": 63900
+ },
+ {
+ "epoch": 0.3299436077618008,
+ "grad_norm": 19957.09375,
+ "learning_rate": 8.525238348959268e-05,
+ "loss": 0.4486,
+ "step": 63950
+ },
+ {
+ "epoch": 0.33020157774441367,
+ "grad_norm": 18999.962890625,
+ "learning_rate": 8.522646836738482e-05,
+ "loss": 0.4525,
+ "step": 64000
+ },
+ {
+ "epoch": 0.33045954772702646,
+ "grad_norm": 24102.1640625,
+ "learning_rate": 8.520053444197026e-05,
+ "loss": 0.4545,
+ "step": 64050
+ },
+ {
+ "epoch": 0.3307175177096393,
+ "grad_norm": 20205.65234375,
+ "learning_rate": 8.517458172719203e-05,
+ "loss": 0.4539,
+ "step": 64100
+ },
+ {
+ "epoch": 0.33097548769225216,
+ "grad_norm": 24099.8203125,
+ "learning_rate": 8.514861023690321e-05,
+ "loss": 0.4465,
+ "step": 64150
+ },
+ {
+ "epoch": 0.33123345767486495,
+ "grad_norm": 19802.203125,
+ "learning_rate": 8.512261998496685e-05,
+ "loss": 0.4546,
+ "step": 64200
+ },
+ {
+ "epoch": 0.3314914276574778,
+ "grad_norm": 23137.609375,
+ "learning_rate": 8.509661098525603e-05,
+ "loss": 0.4539,
+ "step": 64250
+ },
+ {
+ "epoch": 0.3317493976400906,
+ "grad_norm": 23578.609375,
+ "learning_rate": 8.507058325165391e-05,
+ "loss": 0.4513,
+ "step": 64300
+ },
+ {
+ "epoch": 0.33200736762270344,
+ "grad_norm": 19172.0859375,
+ "learning_rate": 8.504453679805353e-05,
+ "loss": 0.456,
+ "step": 64350
+ },
+ {
+ "epoch": 0.33226533760531624,
+ "grad_norm": 19165.775390625,
+ "learning_rate": 8.5018471638358e-05,
+ "loss": 0.4578,
+ "step": 64400
+ },
+ {
+ "epoch": 0.3325233075879291,
+ "grad_norm": 18070.72265625,
+ "learning_rate": 8.49923877864804e-05,
+ "loss": 0.4608,
+ "step": 64450
+ },
+ {
+ "epoch": 0.3327812775705419,
+ "grad_norm": 20918.525390625,
+ "learning_rate": 8.49662852563438e-05,
+ "loss": 0.4526,
+ "step": 64500
+ },
+ {
+ "epoch": 0.3330392475531547,
+ "grad_norm": 21165.05078125,
+ "learning_rate": 8.494016406188121e-05,
+ "loss": 0.4503,
+ "step": 64550
+ },
+ {
+ "epoch": 0.3332972175357675,
+ "grad_norm": 19273.013671875,
+ "learning_rate": 8.491402421703562e-05,
+ "loss": 0.4572,
+ "step": 64600
+ },
+ {
+ "epoch": 0.33355518751838037,
+ "grad_norm": 21221.681640625,
+ "learning_rate": 8.488786573575998e-05,
+ "loss": 0.456,
+ "step": 64650
+ },
+ {
+ "epoch": 0.33381315750099316,
+ "grad_norm": 19485.8125,
+ "learning_rate": 8.486168863201716e-05,
+ "loss": 0.4423,
+ "step": 64700
+ },
+ {
+ "epoch": 0.334071127483606,
+ "grad_norm": 23241.580078125,
+ "learning_rate": 8.483549291978001e-05,
+ "loss": 0.4531,
+ "step": 64750
+ },
+ {
+ "epoch": 0.3343290974662188,
+ "grad_norm": 21281.111328125,
+ "learning_rate": 8.48092786130313e-05,
+ "loss": 0.452,
+ "step": 64800
+ },
+ {
+ "epoch": 0.33458706744883165,
+ "grad_norm": 21610.2578125,
+ "learning_rate": 8.47830457257637e-05,
+ "loss": 0.4488,
+ "step": 64850
+ },
+ {
+ "epoch": 0.3348450374314445,
+ "grad_norm": 19343.466796875,
+ "learning_rate": 8.475679427197982e-05,
+ "loss": 0.4514,
+ "step": 64900
+ },
+ {
+ "epoch": 0.3351030074140573,
+ "grad_norm": 19489.1875,
+ "learning_rate": 8.473052426569219e-05,
+ "loss": 0.447,
+ "step": 64950
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "grad_norm": 24805.84765625,
+ "learning_rate": 8.470423572092323e-05,
+ "loss": 0.4594,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33536097739667015,
+ "eval_loss": 0.440469890832901,
+ "eval_runtime": 3318.76,
+ "eval_samples_per_second": 934.421,
+ "eval_steps_per_second": 1.825,
+ "step": 65000
+ },
+ {
+ "epoch": 0.33561894737928294,
+ "grad_norm": 22912.732421875,
+ "learning_rate": 8.467792865170525e-05,
+ "loss": 0.4435,
+ "step": 65050
+ },
+ {
+ "epoch": 0.3358769173618958,
+ "grad_norm": 19958.994140625,
+ "learning_rate": 8.465160307208045e-05,
+ "loss": 0.4588,
+ "step": 65100
+ },
+ {
+ "epoch": 0.3361348873445086,
+ "grad_norm": 20914.193359375,
+ "learning_rate": 8.462525899610092e-05,
+ "loss": 0.4497,
+ "step": 65150
+ },
+ {
+ "epoch": 0.33639285732712143,
+ "grad_norm": 20505.814453125,
+ "learning_rate": 8.459889643782861e-05,
+ "loss": 0.4569,
+ "step": 65200
+ },
+ {
+ "epoch": 0.3366508273097342,
+ "grad_norm": 19486.068359375,
+ "learning_rate": 8.457251541133535e-05,
+ "loss": 0.4505,
+ "step": 65250
+ },
+ {
+ "epoch": 0.3369087972923471,
+ "grad_norm": 21967.84765625,
+ "learning_rate": 8.454611593070284e-05,
+ "loss": 0.4556,
+ "step": 65300
+ },
+ {
+ "epoch": 0.33716676727495987,
+ "grad_norm": 21949.767578125,
+ "learning_rate": 8.451969801002258e-05,
+ "loss": 0.4491,
+ "step": 65350
+ },
+ {
+ "epoch": 0.3374247372575727,
+ "grad_norm": 19765.14453125,
+ "learning_rate": 8.449326166339595e-05,
+ "loss": 0.4507,
+ "step": 65400
+ },
+ {
+ "epoch": 0.3376827072401855,
+ "grad_norm": 21396.982421875,
+ "learning_rate": 8.446680690493417e-05,
+ "loss": 0.4548,
+ "step": 65450
+ },
+ {
+ "epoch": 0.33794067722279836,
+ "grad_norm": 22511.8359375,
+ "learning_rate": 8.444033374875828e-05,
+ "loss": 0.454,
+ "step": 65500
+ },
+ {
+ "epoch": 0.33819864720541115,
+ "grad_norm": 21264.076171875,
+ "learning_rate": 8.441384220899912e-05,
+ "loss": 0.4486,
+ "step": 65550
+ },
+ {
+ "epoch": 0.338456617188024,
+ "grad_norm": 20736.046875,
+ "learning_rate": 8.438733229979741e-05,
+ "loss": 0.4505,
+ "step": 65600
+ },
+ {
+ "epoch": 0.33871458717063685,
+ "grad_norm": 20183.8359375,
+ "learning_rate": 8.436080403530356e-05,
+ "loss": 0.4485,
+ "step": 65650
+ },
+ {
+ "epoch": 0.33897255715324964,
+ "grad_norm": 21947.3671875,
+ "learning_rate": 8.433425742967787e-05,
+ "loss": 0.4499,
+ "step": 65700
+ },
+ {
+ "epoch": 0.3392305271358625,
+ "grad_norm": 22621.236328125,
+ "learning_rate": 8.430769249709042e-05,
+ "loss": 0.4503,
+ "step": 65750
+ },
+ {
+ "epoch": 0.3394884971184753,
+ "grad_norm": 21537.947265625,
+ "learning_rate": 8.428110925172103e-05,
+ "loss": 0.4634,
+ "step": 65800
+ },
+ {
+ "epoch": 0.33974646710108813,
+ "grad_norm": 20869.759765625,
+ "learning_rate": 8.425450770775936e-05,
+ "loss": 0.4504,
+ "step": 65850
+ },
+ {
+ "epoch": 0.34000443708370093,
+ "grad_norm": 20865.12109375,
+ "learning_rate": 8.422788787940477e-05,
+ "loss": 0.4509,
+ "step": 65900
+ },
+ {
+ "epoch": 0.3402624070663138,
+ "grad_norm": 23897.974609375,
+ "learning_rate": 8.42012497808664e-05,
+ "loss": 0.4512,
+ "step": 65950
+ },
+ {
+ "epoch": 0.34052037704892657,
+ "grad_norm": 23978.56640625,
+ "learning_rate": 8.417459342636318e-05,
+ "loss": 0.4513,
+ "step": 66000
+ },
+ {
+ "epoch": 0.3407783470315394,
+ "grad_norm": 22806.99609375,
+ "learning_rate": 8.414791883012374e-05,
+ "loss": 0.4468,
+ "step": 66050
+ },
+ {
+ "epoch": 0.3410363170141522,
+ "grad_norm": 20348.841796875,
+ "learning_rate": 8.412122600638646e-05,
+ "loss": 0.4484,
+ "step": 66100
+ },
+ {
+ "epoch": 0.34129428699676506,
+ "grad_norm": 21868.353515625,
+ "learning_rate": 8.409451496939945e-05,
+ "loss": 0.4601,
+ "step": 66150
+ },
+ {
+ "epoch": 0.34155225697937786,
+ "grad_norm": 20312.36328125,
+ "learning_rate": 8.406778573342055e-05,
+ "loss": 0.4485,
+ "step": 66200
+ },
+ {
+ "epoch": 0.3418102269619907,
+ "grad_norm": 25603.419921875,
+ "learning_rate": 8.404103831271733e-05,
+ "loss": 0.4487,
+ "step": 66250
+ },
+ {
+ "epoch": 0.34206819694460355,
+ "grad_norm": 21330.416015625,
+ "learning_rate": 8.4014272721567e-05,
+ "loss": 0.449,
+ "step": 66300
+ },
+ {
+ "epoch": 0.34232616692721635,
+ "grad_norm": 20045.4453125,
+ "learning_rate": 8.398748897425656e-05,
+ "loss": 0.447,
+ "step": 66350
+ },
+ {
+ "epoch": 0.3425841369098292,
+ "grad_norm": 21575.642578125,
+ "learning_rate": 8.396068708508262e-05,
+ "loss": 0.4495,
+ "step": 66400
+ },
+ {
+ "epoch": 0.342842106892442,
+ "grad_norm": 20396.5390625,
+ "learning_rate": 8.393386706835154e-05,
+ "loss": 0.4478,
+ "step": 66450
+ },
+ {
+ "epoch": 0.34310007687505484,
+ "grad_norm": 20366.8046875,
+ "learning_rate": 8.390702893837929e-05,
+ "loss": 0.4531,
+ "step": 66500
+ },
+ {
+ "epoch": 0.34335804685766763,
+ "grad_norm": 23514.521484375,
+ "learning_rate": 8.388017270949158e-05,
+ "loss": 0.4496,
+ "step": 66550
+ },
+ {
+ "epoch": 0.3436160168402805,
+ "grad_norm": 23656.869140625,
+ "learning_rate": 8.385329839602372e-05,
+ "loss": 0.448,
+ "step": 66600
+ },
+ {
+ "epoch": 0.3438739868228933,
+ "grad_norm": 23712.216796875,
+ "learning_rate": 8.382640601232071e-05,
+ "loss": 0.4502,
+ "step": 66650
+ },
+ {
+ "epoch": 0.3441319568055061,
+ "grad_norm": 23220.240234375,
+ "learning_rate": 8.379949557273717e-05,
+ "loss": 0.4469,
+ "step": 66700
+ },
+ {
+ "epoch": 0.3443899267881189,
+ "grad_norm": 21469.244140625,
+ "learning_rate": 8.37725670916374e-05,
+ "loss": 0.4506,
+ "step": 66750
+ },
+ {
+ "epoch": 0.34464789677073177,
+ "grad_norm": 19195.431640625,
+ "learning_rate": 8.374562058339528e-05,
+ "loss": 0.4494,
+ "step": 66800
+ },
+ {
+ "epoch": 0.34490586675334456,
+ "grad_norm": 21464.130859375,
+ "learning_rate": 8.371865606239433e-05,
+ "loss": 0.4552,
+ "step": 66850
+ },
+ {
+ "epoch": 0.3451638367359574,
+ "grad_norm": 23449.76953125,
+ "learning_rate": 8.36916735430277e-05,
+ "loss": 0.4513,
+ "step": 66900
+ },
+ {
+ "epoch": 0.3454218067185702,
+ "grad_norm": 20593.39453125,
+ "learning_rate": 8.366467303969814e-05,
+ "loss": 0.447,
+ "step": 66950
+ },
+ {
+ "epoch": 0.34567977670118305,
+ "grad_norm": 21341.72265625,
+ "learning_rate": 8.3637654566818e-05,
+ "loss": 0.4448,
+ "step": 67000
+ },
+ {
+ "epoch": 0.3459377466837959,
+ "grad_norm": 20746.919921875,
+ "learning_rate": 8.361061813880919e-05,
+ "loss": 0.4511,
+ "step": 67050
+ },
+ {
+ "epoch": 0.3461957166664087,
+ "grad_norm": 19786.162109375,
+ "learning_rate": 8.358356377010325e-05,
+ "loss": 0.452,
+ "step": 67100
+ },
+ {
+ "epoch": 0.34645368664902154,
+ "grad_norm": 20875.25,
+ "learning_rate": 8.355649147514128e-05,
+ "loss": 0.4491,
+ "step": 67150
+ },
+ {
+ "epoch": 0.34671165663163434,
+ "grad_norm": 22833.728515625,
+ "learning_rate": 8.352940126837394e-05,
+ "loss": 0.4545,
+ "step": 67200
+ },
+ {
+ "epoch": 0.3469696266142472,
+ "grad_norm": 21289.896484375,
+ "learning_rate": 8.350229316426146e-05,
+ "loss": 0.4451,
+ "step": 67250
+ },
+ {
+ "epoch": 0.34722759659686,
+ "grad_norm": 23276.080078125,
+ "learning_rate": 8.347516717727363e-05,
+ "loss": 0.4468,
+ "step": 67300
+ },
+ {
+ "epoch": 0.3474855665794728,
+ "grad_norm": 22568.234375,
+ "learning_rate": 8.344802332188977e-05,
+ "loss": 0.4455,
+ "step": 67350
+ },
+ {
+ "epoch": 0.3477435365620856,
+ "grad_norm": 19527.234375,
+ "learning_rate": 8.342086161259874e-05,
+ "loss": 0.4511,
+ "step": 67400
+ },
+ {
+ "epoch": 0.34800150654469847,
+ "grad_norm": 21764.56640625,
+ "learning_rate": 8.339368206389895e-05,
+ "loss": 0.4481,
+ "step": 67450
+ },
+ {
+ "epoch": 0.34825947652731126,
+ "grad_norm": 21142.33984375,
+ "learning_rate": 8.336648469029829e-05,
+ "loss": 0.4539,
+ "step": 67500
+ },
+ {
+ "epoch": 0.3485174465099241,
+ "grad_norm": 21612.60546875,
+ "learning_rate": 8.333926950631421e-05,
+ "loss": 0.4497,
+ "step": 67550
+ },
+ {
+ "epoch": 0.3487754164925369,
+ "grad_norm": 20772.0390625,
+ "learning_rate": 8.331203652647364e-05,
+ "loss": 0.458,
+ "step": 67600
+ },
+ {
+ "epoch": 0.34903338647514975,
+ "grad_norm": 22197.166015625,
+ "learning_rate": 8.328478576531303e-05,
+ "loss": 0.4499,
+ "step": 67650
+ },
+ {
+ "epoch": 0.34929135645776255,
+ "grad_norm": 20853.865234375,
+ "learning_rate": 8.32575172373783e-05,
+ "loss": 0.4473,
+ "step": 67700
+ },
+ {
+ "epoch": 0.3495493264403754,
+ "grad_norm": 19692.892578125,
+ "learning_rate": 8.323023095722486e-05,
+ "loss": 0.4516,
+ "step": 67750
+ },
+ {
+ "epoch": 0.34980729642298825,
+ "grad_norm": 22032.115234375,
+ "learning_rate": 8.32029269394176e-05,
+ "loss": 0.4452,
+ "step": 67800
+ },
+ {
+ "epoch": 0.35006526640560104,
+ "grad_norm": 23928.783203125,
+ "learning_rate": 8.317560519853089e-05,
+ "loss": 0.4489,
+ "step": 67850
+ },
+ {
+ "epoch": 0.3503232363882139,
+ "grad_norm": 20832.560546875,
+ "learning_rate": 8.314826574914853e-05,
+ "loss": 0.4493,
+ "step": 67900
+ },
+ {
+ "epoch": 0.3505812063708267,
+ "grad_norm": 23453.634765625,
+ "learning_rate": 8.31209086058638e-05,
+ "loss": 0.4487,
+ "step": 67950
+ },
+ {
+ "epoch": 0.35083917635343953,
+ "grad_norm": 23585.826171875,
+ "learning_rate": 8.309353378327938e-05,
+ "loss": 0.4473,
+ "step": 68000
+ },
+ {
+ "epoch": 0.3510971463360523,
+ "grad_norm": 21680.953125,
+ "learning_rate": 8.306614129600745e-05,
+ "loss": 0.4494,
+ "step": 68050
+ },
+ {
+ "epoch": 0.3513551163186652,
+ "grad_norm": 19228.56640625,
+ "learning_rate": 8.303873115866958e-05,
+ "loss": 0.4483,
+ "step": 68100
+ },
+ {
+ "epoch": 0.35161308630127797,
+ "grad_norm": 22056.6328125,
+ "learning_rate": 8.301130338589679e-05,
+ "loss": 0.4485,
+ "step": 68150
+ },
+ {
+ "epoch": 0.3518710562838908,
+ "grad_norm": 22030.484375,
+ "learning_rate": 8.298385799232947e-05,
+ "loss": 0.4462,
+ "step": 68200
+ },
+ {
+ "epoch": 0.3521290262665036,
+ "grad_norm": 19658.33984375,
+ "learning_rate": 8.295639499261745e-05,
+ "loss": 0.4444,
+ "step": 68250
+ },
+ {
+ "epoch": 0.35238699624911646,
+ "grad_norm": 19667.8125,
+ "learning_rate": 8.292891440141997e-05,
+ "loss": 0.4482,
+ "step": 68300
+ },
+ {
+ "epoch": 0.35264496623172925,
+ "grad_norm": 20248.193359375,
+ "learning_rate": 8.290141623340558e-05,
+ "loss": 0.454,
+ "step": 68350
+ },
+ {
+ "epoch": 0.3529029362143421,
+ "grad_norm": 21358.89453125,
+ "learning_rate": 8.287390050325232e-05,
+ "loss": 0.4485,
+ "step": 68400
+ },
+ {
+ "epoch": 0.35316090619695495,
+ "grad_norm": 19209.328125,
+ "learning_rate": 8.284636722564754e-05,
+ "loss": 0.4505,
+ "step": 68450
+ },
+ {
+ "epoch": 0.35341887617956774,
+ "grad_norm": 21890.7109375,
+ "learning_rate": 8.281881641528795e-05,
+ "loss": 0.4531,
+ "step": 68500
+ },
+ {
+ "epoch": 0.3536768461621806,
+ "grad_norm": 20904.052734375,
+ "learning_rate": 8.279124808687967e-05,
+ "loss": 0.4494,
+ "step": 68550
+ },
+ {
+ "epoch": 0.3539348161447934,
+ "grad_norm": 22519.888671875,
+ "learning_rate": 8.276366225513812e-05,
+ "loss": 0.4422,
+ "step": 68600
+ },
+ {
+ "epoch": 0.35419278612740623,
+ "grad_norm": 20027.009765625,
+ "learning_rate": 8.27360589347881e-05,
+ "loss": 0.4484,
+ "step": 68650
+ },
+ {
+ "epoch": 0.354450756110019,
+ "grad_norm": 22069.64453125,
+ "learning_rate": 8.27084381405637e-05,
+ "loss": 0.443,
+ "step": 68700
+ },
+ {
+ "epoch": 0.3547087260926319,
+ "grad_norm": 23096.74609375,
+ "learning_rate": 8.26807998872084e-05,
+ "loss": 0.4437,
+ "step": 68750
+ },
+ {
+ "epoch": 0.35496669607524467,
+ "grad_norm": 19204.626953125,
+ "learning_rate": 8.265314418947494e-05,
+ "loss": 0.4496,
+ "step": 68800
+ },
+ {
+ "epoch": 0.3552246660578575,
+ "grad_norm": 26871.888671875,
+ "learning_rate": 8.262547106212541e-05,
+ "loss": 0.446,
+ "step": 68850
+ },
+ {
+ "epoch": 0.3554826360404703,
+ "grad_norm": 21342.556640625,
+ "learning_rate": 8.259778051993118e-05,
+ "loss": 0.4525,
+ "step": 68900
+ },
+ {
+ "epoch": 0.35574060602308316,
+ "grad_norm": 23054.814453125,
+ "learning_rate": 8.25700725776729e-05,
+ "loss": 0.4427,
+ "step": 68950
+ },
+ {
+ "epoch": 0.35599857600569595,
+ "grad_norm": 20473.818359375,
+ "learning_rate": 8.254234725014061e-05,
+ "loss": 0.4452,
+ "step": 69000
+ },
+ {
+ "epoch": 0.3562565459883088,
+ "grad_norm": 22081.576171875,
+ "learning_rate": 8.251460455213347e-05,
+ "loss": 0.4533,
+ "step": 69050
+ },
+ {
+ "epoch": 0.3565145159709216,
+ "grad_norm": 21840.048828125,
+ "learning_rate": 8.248684449846004e-05,
+ "loss": 0.4503,
+ "step": 69100
+ },
+ {
+ "epoch": 0.35677248595353445,
+ "grad_norm": 21595.234375,
+ "learning_rate": 8.245906710393808e-05,
+ "loss": 0.4459,
+ "step": 69150
+ },
+ {
+ "epoch": 0.3570304559361473,
+ "grad_norm": 22540.302734375,
+ "learning_rate": 8.243127238339463e-05,
+ "loss": 0.4461,
+ "step": 69200
+ },
+ {
+ "epoch": 0.3572884259187601,
+ "grad_norm": 20646.5859375,
+ "learning_rate": 8.2403460351666e-05,
+ "loss": 0.4522,
+ "step": 69250
+ },
+ {
+ "epoch": 0.35754639590137294,
+ "grad_norm": 20219.978515625,
+ "learning_rate": 8.237563102359767e-05,
+ "loss": 0.4464,
+ "step": 69300
+ },
+ {
+ "epoch": 0.35780436588398573,
+ "grad_norm": 21399.888671875,
+ "learning_rate": 8.234778441404441e-05,
+ "loss": 0.451,
+ "step": 69350
+ },
+ {
+ "epoch": 0.3580623358665986,
+ "grad_norm": 23263.193359375,
+ "learning_rate": 8.231992053787024e-05,
+ "loss": 0.4491,
+ "step": 69400
+ },
+ {
+ "epoch": 0.3583203058492114,
+ "grad_norm": 20740.455078125,
+ "learning_rate": 8.229203940994829e-05,
+ "loss": 0.4456,
+ "step": 69450
+ },
+ {
+ "epoch": 0.3585782758318242,
+ "grad_norm": 21715.078125,
+ "learning_rate": 8.226414104516102e-05,
+ "loss": 0.4467,
+ "step": 69500
+ },
+ {
+ "epoch": 0.358836245814437,
+ "grad_norm": 19771.517578125,
+ "learning_rate": 8.223622545840001e-05,
+ "loss": 0.4505,
+ "step": 69550
+ },
+ {
+ "epoch": 0.35909421579704986,
+ "grad_norm": 20944.298828125,
+ "learning_rate": 8.220829266456608e-05,
+ "loss": 0.4481,
+ "step": 69600
+ },
+ {
+ "epoch": 0.35935218577966266,
+ "grad_norm": 22313.017578125,
+ "learning_rate": 8.21803426785692e-05,
+ "loss": 0.4503,
+ "step": 69650
+ },
+ {
+ "epoch": 0.3596101557622755,
+ "grad_norm": 22525.5859375,
+ "learning_rate": 8.215237551532853e-05,
+ "loss": 0.4488,
+ "step": 69700
+ },
+ {
+ "epoch": 0.3598681257448883,
+ "grad_norm": 22731.85546875,
+ "learning_rate": 8.21243911897724e-05,
+ "loss": 0.4476,
+ "step": 69750
+ },
+ {
+ "epoch": 0.36012609572750115,
+ "grad_norm": 20872.9375,
+ "learning_rate": 8.20963897168383e-05,
+ "loss": 0.4485,
+ "step": 69800
+ },
+ {
+ "epoch": 0.360384065710114,
+ "grad_norm": 21066.095703125,
+ "learning_rate": 8.206837111147289e-05,
+ "loss": 0.4511,
+ "step": 69850
+ },
+ {
+ "epoch": 0.3606420356927268,
+ "grad_norm": 21823.62890625,
+ "learning_rate": 8.204033538863197e-05,
+ "loss": 0.4415,
+ "step": 69900
+ },
+ {
+ "epoch": 0.36090000567533964,
+ "grad_norm": 19639.724609375,
+ "learning_rate": 8.201228256328042e-05,
+ "loss": 0.4456,
+ "step": 69950
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "grad_norm": 25321.20703125,
+ "learning_rate": 8.198421265039231e-05,
+ "loss": 0.4506,
+ "step": 70000
+ },
+ {
+ "epoch": 0.36115797565795243,
+ "eval_loss": 0.43597322702407837,
+ "eval_runtime": 3285.9769,
+ "eval_samples_per_second": 943.744,
+ "eval_steps_per_second": 1.843,
+ "step": 70000
+ },
+ {
+ "epoch": 0.3614159456405653,
+ "grad_norm": 19558.943359375,
+ "learning_rate": 8.195612566495084e-05,
+ "loss": 0.4502,
+ "step": 70050
+ },
+ {
+ "epoch": 0.3616739156231781,
+ "grad_norm": 21766.482421875,
+ "learning_rate": 8.192802162194828e-05,
+ "loss": 0.4444,
+ "step": 70100
+ },
+ {
+ "epoch": 0.3619318856057909,
+ "grad_norm": 23117.017578125,
+ "learning_rate": 8.189990053638603e-05,
+ "loss": 0.4476,
+ "step": 70150
+ },
+ {
+ "epoch": 0.3621898555884037,
+ "grad_norm": 19175.60546875,
+ "learning_rate": 8.18717624232746e-05,
+ "loss": 0.4479,
+ "step": 70200
+ },
+ {
+ "epoch": 0.36244782557101657,
+ "grad_norm": 22124.80078125,
+ "learning_rate": 8.184360729763351e-05,
+ "loss": 0.449,
+ "step": 70250
+ },
+ {
+ "epoch": 0.36270579555362936,
+ "grad_norm": 21717.501953125,
+ "learning_rate": 8.181543517449147e-05,
+ "loss": 0.4488,
+ "step": 70300
+ },
+ {
+ "epoch": 0.3629637655362422,
+ "grad_norm": 20235.162109375,
+ "learning_rate": 8.178724606888621e-05,
+ "loss": 0.4496,
+ "step": 70350
+ },
+ {
+ "epoch": 0.363221735518855,
+ "grad_norm": 22513.677734375,
+ "learning_rate": 8.175903999586455e-05,
+ "loss": 0.4463,
+ "step": 70400
+ },
+ {
+ "epoch": 0.36347970550146785,
+ "grad_norm": 21388.1953125,
+ "learning_rate": 8.173081697048228e-05,
+ "loss": 0.4446,
+ "step": 70450
+ },
+ {
+ "epoch": 0.36373767548408065,
+ "grad_norm": 20549.271484375,
+ "learning_rate": 8.170257700780435e-05,
+ "loss": 0.4421,
+ "step": 70500
+ },
+ {
+ "epoch": 0.3639956454666935,
+ "grad_norm": 21219.158203125,
+ "learning_rate": 8.16743201229047e-05,
+ "loss": 0.4472,
+ "step": 70550
+ },
+ {
+ "epoch": 0.36425361544930634,
+ "grad_norm": 20570.34375,
+ "learning_rate": 8.164604633086632e-05,
+ "loss": 0.4487,
+ "step": 70600
+ },
+ {
+ "epoch": 0.36451158543191914,
+ "grad_norm": 17376.671875,
+ "learning_rate": 8.161775564678118e-05,
+ "loss": 0.4413,
+ "step": 70650
+ },
+ {
+ "epoch": 0.364769555414532,
+ "grad_norm": 21676.33984375,
+ "learning_rate": 8.158944808575032e-05,
+ "loss": 0.4433,
+ "step": 70700
+ },
+ {
+ "epoch": 0.3650275253971448,
+ "grad_norm": 21901.001953125,
+ "learning_rate": 8.156112366288378e-05,
+ "loss": 0.4465,
+ "step": 70750
+ },
+ {
+ "epoch": 0.36528549537975763,
+ "grad_norm": 20330.720703125,
+ "learning_rate": 8.153278239330056e-05,
+ "loss": 0.4456,
+ "step": 70800
+ },
+ {
+ "epoch": 0.3655434653623704,
+ "grad_norm": 22179.904296875,
+ "learning_rate": 8.15044242921287e-05,
+ "loss": 0.4465,
+ "step": 70850
+ },
+ {
+ "epoch": 0.3658014353449833,
+ "grad_norm": 21384.66015625,
+ "learning_rate": 8.14760493745052e-05,
+ "loss": 0.4476,
+ "step": 70900
+ },
+ {
+ "epoch": 0.36605940532759607,
+ "grad_norm": 21706.103515625,
+ "learning_rate": 8.144765765557604e-05,
+ "loss": 0.4475,
+ "step": 70950
+ },
+ {
+ "epoch": 0.3663173753102089,
+ "grad_norm": 20332.5,
+ "learning_rate": 8.141924915049617e-05,
+ "loss": 0.449,
+ "step": 71000
+ },
+ {
+ "epoch": 0.3665753452928217,
+ "grad_norm": 22648.640625,
+ "learning_rate": 8.139082387442951e-05,
+ "loss": 0.4566,
+ "step": 71050
+ },
+ {
+ "epoch": 0.36683331527543456,
+ "grad_norm": 21496.291015625,
+ "learning_rate": 8.136238184254892e-05,
+ "loss": 0.4493,
+ "step": 71100
+ },
+ {
+ "epoch": 0.36709128525804735,
+ "grad_norm": 22114.169921875,
+ "learning_rate": 8.133392307003618e-05,
+ "loss": 0.4441,
+ "step": 71150
+ },
+ {
+ "epoch": 0.3673492552406602,
+ "grad_norm": 22476.390625,
+ "learning_rate": 8.130544757208205e-05,
+ "loss": 0.4391,
+ "step": 71200
+ },
+ {
+ "epoch": 0.367607225223273,
+ "grad_norm": 22175.044921875,
+ "learning_rate": 8.127695536388623e-05,
+ "loss": 0.4439,
+ "step": 71250
+ },
+ {
+ "epoch": 0.36786519520588584,
+ "grad_norm": 19715.728515625,
+ "learning_rate": 8.124844646065724e-05,
+ "loss": 0.448,
+ "step": 71300
+ },
+ {
+ "epoch": 0.3681231651884987,
+ "grad_norm": 19609.146484375,
+ "learning_rate": 8.121992087761266e-05,
+ "loss": 0.4476,
+ "step": 71350
+ },
+ {
+ "epoch": 0.3683811351711115,
+ "grad_norm": 21872.12890625,
+ "learning_rate": 8.119137862997883e-05,
+ "loss": 0.4536,
+ "step": 71400
+ },
+ {
+ "epoch": 0.36863910515372433,
+ "grad_norm": 19710.619140625,
+ "learning_rate": 8.116281973299107e-05,
+ "loss": 0.4466,
+ "step": 71450
+ },
+ {
+ "epoch": 0.3688970751363371,
+ "grad_norm": 21783.138671875,
+ "learning_rate": 8.113424420189357e-05,
+ "loss": 0.4422,
+ "step": 71500
+ },
+ {
+ "epoch": 0.36915504511895,
+ "grad_norm": 20527.984375,
+ "learning_rate": 8.110565205193941e-05,
+ "loss": 0.4499,
+ "step": 71550
+ },
+ {
+ "epoch": 0.36941301510156277,
+ "grad_norm": 21693.171875,
+ "learning_rate": 8.10770432983905e-05,
+ "loss": 0.4465,
+ "step": 71600
+ },
+ {
+ "epoch": 0.3696709850841756,
+ "grad_norm": 19817.142578125,
+ "learning_rate": 8.104841795651765e-05,
+ "loss": 0.4471,
+ "step": 71650
+ },
+ {
+ "epoch": 0.3699289550667884,
+ "grad_norm": 20883.767578125,
+ "learning_rate": 8.101977604160052e-05,
+ "loss": 0.4507,
+ "step": 71700
+ },
+ {
+ "epoch": 0.37018692504940126,
+ "grad_norm": 21206.943359375,
+ "learning_rate": 8.099111756892759e-05,
+ "loss": 0.4415,
+ "step": 71750
+ },
+ {
+ "epoch": 0.37044489503201405,
+ "grad_norm": 21431.19140625,
+ "learning_rate": 8.096244255379621e-05,
+ "loss": 0.4542,
+ "step": 71800
+ },
+ {
+ "epoch": 0.3707028650146269,
+ "grad_norm": 23020.34375,
+ "learning_rate": 8.093375101151255e-05,
+ "loss": 0.4481,
+ "step": 71850
+ },
+ {
+ "epoch": 0.3709608349972397,
+ "grad_norm": 20704.1171875,
+ "learning_rate": 8.09050429573916e-05,
+ "loss": 0.4427,
+ "step": 71900
+ },
+ {
+ "epoch": 0.37121880497985255,
+ "grad_norm": 20195.037109375,
+ "learning_rate": 8.087631840675715e-05,
+ "loss": 0.4416,
+ "step": 71950
+ },
+ {
+ "epoch": 0.3714767749624654,
+ "grad_norm": 21187.99609375,
+ "learning_rate": 8.084757737494184e-05,
+ "loss": 0.452,
+ "step": 72000
+ },
+ {
+ "epoch": 0.3717347449450782,
+ "grad_norm": 20694.912109375,
+ "learning_rate": 8.081881987728703e-05,
+ "loss": 0.4416,
+ "step": 72050
+ },
+ {
+ "epoch": 0.37199271492769104,
+ "grad_norm": 23006.939453125,
+ "learning_rate": 8.079004592914297e-05,
+ "loss": 0.4426,
+ "step": 72100
+ },
+ {
+ "epoch": 0.37225068491030383,
+ "grad_norm": 21854.025390625,
+ "learning_rate": 8.076125554586859e-05,
+ "loss": 0.4453,
+ "step": 72150
+ },
+ {
+ "epoch": 0.3725086548929167,
+ "grad_norm": 19155.400390625,
+ "learning_rate": 8.073244874283166e-05,
+ "loss": 0.4539,
+ "step": 72200
+ },
+ {
+ "epoch": 0.3727666248755295,
+ "grad_norm": 22085.5625,
+ "learning_rate": 8.070362553540869e-05,
+ "loss": 0.4474,
+ "step": 72250
+ },
+ {
+ "epoch": 0.3730245948581423,
+ "grad_norm": 21225.626953125,
+ "learning_rate": 8.067478593898495e-05,
+ "loss": 0.4431,
+ "step": 72300
+ },
+ {
+ "epoch": 0.3732825648407551,
+ "grad_norm": 21605.546875,
+ "learning_rate": 8.064592996895446e-05,
+ "loss": 0.4534,
+ "step": 72350
+ },
+ {
+ "epoch": 0.37354053482336796,
+ "grad_norm": 20774.87109375,
+ "learning_rate": 8.061705764071999e-05,
+ "loss": 0.4462,
+ "step": 72400
+ },
+ {
+ "epoch": 0.37379850480598076,
+ "grad_norm": 21871.390625,
+ "learning_rate": 8.0588168969693e-05,
+ "loss": 0.4445,
+ "step": 72450
+ },
+ {
+ "epoch": 0.3740564747885936,
+ "grad_norm": 22102.560546875,
+ "learning_rate": 8.05592639712937e-05,
+ "loss": 0.4478,
+ "step": 72500
+ },
+ {
+ "epoch": 0.3743144447712064,
+ "grad_norm": 21172.283203125,
+ "learning_rate": 8.053034266095105e-05,
+ "loss": 0.4469,
+ "step": 72550
+ },
+ {
+ "epoch": 0.37457241475381925,
+ "grad_norm": 21827.390625,
+ "learning_rate": 8.050140505410268e-05,
+ "loss": 0.4485,
+ "step": 72600
+ },
+ {
+ "epoch": 0.37483038473643204,
+ "grad_norm": 21271.87890625,
+ "learning_rate": 8.047245116619492e-05,
+ "loss": 0.45,
+ "step": 72650
+ },
+ {
+ "epoch": 0.3750883547190449,
+ "grad_norm": 21192.6484375,
+ "learning_rate": 8.04434810126828e-05,
+ "loss": 0.442,
+ "step": 72700
+ },
+ {
+ "epoch": 0.37534632470165774,
+ "grad_norm": 21529.736328125,
+ "learning_rate": 8.041449460903001e-05,
+ "loss": 0.4462,
+ "step": 72750
+ },
+ {
+ "epoch": 0.37560429468427053,
+ "grad_norm": 18609.474609375,
+ "learning_rate": 8.038549197070893e-05,
+ "loss": 0.4436,
+ "step": 72800
+ },
+ {
+ "epoch": 0.3758622646668834,
+ "grad_norm": 21631.82421875,
+ "learning_rate": 8.035647311320062e-05,
+ "loss": 0.4507,
+ "step": 72850
+ },
+ {
+ "epoch": 0.3761202346494962,
+ "grad_norm": 22347.056640625,
+ "learning_rate": 8.03274380519948e-05,
+ "loss": 0.4472,
+ "step": 72900
+ },
+ {
+ "epoch": 0.376378204632109,
+ "grad_norm": 20416.37109375,
+ "learning_rate": 8.029838680258979e-05,
+ "loss": 0.4475,
+ "step": 72950
+ },
+ {
+ "epoch": 0.3766361746147218,
+ "grad_norm": 21952.27734375,
+ "learning_rate": 8.026931938049259e-05,
+ "loss": 0.4449,
+ "step": 73000
+ },
+ {
+ "epoch": 0.37689414459733467,
+ "grad_norm": 23068.12109375,
+ "learning_rate": 8.024023580121885e-05,
+ "loss": 0.4477,
+ "step": 73050
+ },
+ {
+ "epoch": 0.37715211457994746,
+ "grad_norm": 21956.462890625,
+ "learning_rate": 8.021113608029281e-05,
+ "loss": 0.4459,
+ "step": 73100
+ },
+ {
+ "epoch": 0.3774100845625603,
+ "grad_norm": 20933.28125,
+ "learning_rate": 8.018202023324733e-05,
+ "loss": 0.4481,
+ "step": 73150
+ },
+ {
+ "epoch": 0.3776680545451731,
+ "grad_norm": 23138.638671875,
+ "learning_rate": 8.015288827562389e-05,
+ "loss": 0.437,
+ "step": 73200
+ },
+ {
+ "epoch": 0.37792602452778595,
+ "grad_norm": 20973.119140625,
+ "learning_rate": 8.012374022297255e-05,
+ "loss": 0.4454,
+ "step": 73250
+ },
+ {
+ "epoch": 0.37818399451039875,
+ "grad_norm": 21328.29296875,
+ "learning_rate": 8.0094576090852e-05,
+ "loss": 0.4426,
+ "step": 73300
+ },
+ {
+ "epoch": 0.3784419644930116,
+ "grad_norm": 20653.591796875,
+ "learning_rate": 8.006539589482949e-05,
+ "loss": 0.4448,
+ "step": 73350
+ },
+ {
+ "epoch": 0.3786999344756244,
+ "grad_norm": 21520.181640625,
+ "learning_rate": 8.003619965048083e-05,
+ "loss": 0.4428,
+ "step": 73400
+ },
+ {
+ "epoch": 0.37895790445823724,
+ "grad_norm": 20736.89453125,
+ "learning_rate": 8.000698737339041e-05,
+ "loss": 0.4483,
+ "step": 73450
+ },
+ {
+ "epoch": 0.3792158744408501,
+ "grad_norm": 23887.587890625,
+ "learning_rate": 7.997775907915118e-05,
+ "loss": 0.4518,
+ "step": 73500
+ },
+ {
+ "epoch": 0.3794738444234629,
+ "grad_norm": 23771.8671875,
+ "learning_rate": 7.994851478336465e-05,
+ "loss": 0.4479,
+ "step": 73550
+ },
+ {
+ "epoch": 0.37973181440607573,
+ "grad_norm": 21563.27734375,
+ "learning_rate": 7.991925450164084e-05,
+ "loss": 0.4433,
+ "step": 73600
+ },
+ {
+ "epoch": 0.3799897843886885,
+ "grad_norm": 21403.751953125,
+ "learning_rate": 7.988997824959832e-05,
+ "loss": 0.4443,
+ "step": 73650
+ },
+ {
+ "epoch": 0.38024775437130137,
+ "grad_norm": 22136.51171875,
+ "learning_rate": 7.986068604286421e-05,
+ "loss": 0.446,
+ "step": 73700
+ },
+ {
+ "epoch": 0.38050572435391417,
+ "grad_norm": 22143.857421875,
+ "learning_rate": 7.98313778970741e-05,
+ "loss": 0.4416,
+ "step": 73750
+ },
+ {
+ "epoch": 0.380763694336527,
+ "grad_norm": 22035.1171875,
+ "learning_rate": 7.980205382787211e-05,
+ "loss": 0.4413,
+ "step": 73800
+ },
+ {
+ "epoch": 0.3810216643191398,
+ "grad_norm": 21744.25390625,
+ "learning_rate": 7.97727138509109e-05,
+ "loss": 0.4463,
+ "step": 73850
+ },
+ {
+ "epoch": 0.38127963430175266,
+ "grad_norm": 21739.26171875,
+ "learning_rate": 7.974335798185153e-05,
+ "loss": 0.4415,
+ "step": 73900
+ },
+ {
+ "epoch": 0.38153760428436545,
+ "grad_norm": 20974.59765625,
+ "learning_rate": 7.971398623636361e-05,
+ "loss": 0.4457,
+ "step": 73950
+ },
+ {
+ "epoch": 0.3817955742669783,
+ "grad_norm": 19807.79296875,
+ "learning_rate": 7.968459863012523e-05,
+ "loss": 0.4423,
+ "step": 74000
+ },
+ {
+ "epoch": 0.3820535442495911,
+ "grad_norm": 21711.158203125,
+ "learning_rate": 7.96551951788229e-05,
+ "loss": 0.4466,
+ "step": 74050
+ },
+ {
+ "epoch": 0.38231151423220394,
+ "grad_norm": 19187.47265625,
+ "learning_rate": 7.962577589815163e-05,
+ "loss": 0.4387,
+ "step": 74100
+ },
+ {
+ "epoch": 0.3825694842148168,
+ "grad_norm": 19402.611328125,
+ "learning_rate": 7.959634080381486e-05,
+ "loss": 0.444,
+ "step": 74150
+ },
+ {
+ "epoch": 0.3828274541974296,
+ "grad_norm": 21287.9765625,
+ "learning_rate": 7.956688991152445e-05,
+ "loss": 0.4386,
+ "step": 74200
+ },
+ {
+ "epoch": 0.38308542418004243,
+ "grad_norm": 20430.591796875,
+ "learning_rate": 7.953742323700075e-05,
+ "loss": 0.4453,
+ "step": 74250
+ },
+ {
+ "epoch": 0.3833433941626552,
+ "grad_norm": 23246.041015625,
+ "learning_rate": 7.950794079597248e-05,
+ "loss": 0.4448,
+ "step": 74300
+ },
+ {
+ "epoch": 0.3836013641452681,
+ "grad_norm": 23098.74609375,
+ "learning_rate": 7.94784426041768e-05,
+ "loss": 0.4449,
+ "step": 74350
+ },
+ {
+ "epoch": 0.38385933412788087,
+ "grad_norm": 21504.71484375,
+ "learning_rate": 7.944892867735929e-05,
+ "loss": 0.4423,
+ "step": 74400
+ },
+ {
+ "epoch": 0.3841173041104937,
+ "grad_norm": 20115.0859375,
+ "learning_rate": 7.941939903127386e-05,
+ "loss": 0.4462,
+ "step": 74450
+ },
+ {
+ "epoch": 0.3843752740931065,
+ "grad_norm": 20473.681640625,
+ "learning_rate": 7.938985368168293e-05,
+ "loss": 0.4541,
+ "step": 74500
+ },
+ {
+ "epoch": 0.38463324407571936,
+ "grad_norm": 19664.6640625,
+ "learning_rate": 7.93602926443572e-05,
+ "loss": 0.4439,
+ "step": 74550
+ },
+ {
+ "epoch": 0.38489121405833215,
+ "grad_norm": 20806.474609375,
+ "learning_rate": 7.933071593507579e-05,
+ "loss": 0.439,
+ "step": 74600
+ },
+ {
+ "epoch": 0.385149184040945,
+ "grad_norm": 20905.197265625,
+ "learning_rate": 7.930112356962618e-05,
+ "loss": 0.444,
+ "step": 74650
+ },
+ {
+ "epoch": 0.3854071540235578,
+ "grad_norm": 26333.470703125,
+ "learning_rate": 7.927151556380417e-05,
+ "loss": 0.4462,
+ "step": 74700
+ },
+ {
+ "epoch": 0.38566512400617065,
+ "grad_norm": 20478.18359375,
+ "learning_rate": 7.924189193341396e-05,
+ "loss": 0.4456,
+ "step": 74750
+ },
+ {
+ "epoch": 0.38592309398878344,
+ "grad_norm": 20605.662109375,
+ "learning_rate": 7.921225269426808e-05,
+ "loss": 0.4412,
+ "step": 74800
+ },
+ {
+ "epoch": 0.3861810639713963,
+ "grad_norm": 23029.943359375,
+ "learning_rate": 7.918259786218738e-05,
+ "loss": 0.4427,
+ "step": 74850
+ },
+ {
+ "epoch": 0.38643903395400914,
+ "grad_norm": 23275.130859375,
+ "learning_rate": 7.915292745300103e-05,
+ "loss": 0.4436,
+ "step": 74900
+ },
+ {
+ "epoch": 0.38669700393662193,
+ "grad_norm": 22123.671875,
+ "learning_rate": 7.91232414825465e-05,
+ "loss": 0.4456,
+ "step": 74950
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "grad_norm": 22476.365234375,
+ "learning_rate": 7.909353996666961e-05,
+ "loss": 0.4424,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3869549739192348,
+ "eval_loss": 0.43277591466903687,
+ "eval_runtime": 3260.4686,
+ "eval_samples_per_second": 951.127,
+ "eval_steps_per_second": 1.858,
+ "step": 75000
+ },
+ {
+ "epoch": 0.3872129439018476,
+ "grad_norm": 22150.966796875,
+ "learning_rate": 7.906382292122448e-05,
+ "loss": 0.4407,
+ "step": 75050
+ },
+ {
+ "epoch": 0.3874709138844604,
+ "grad_norm": 20100.5625,
+ "learning_rate": 7.903409036207343e-05,
+ "loss": 0.4443,
+ "step": 75100
+ },
+ {
+ "epoch": 0.3877288838670732,
+ "grad_norm": 22078.353515625,
+ "learning_rate": 7.900434230508715e-05,
+ "loss": 0.4468,
+ "step": 75150
+ },
+ {
+ "epoch": 0.38798685384968606,
+ "grad_norm": 20395.498046875,
+ "learning_rate": 7.897457876614461e-05,
+ "loss": 0.4424,
+ "step": 75200
+ },
+ {
+ "epoch": 0.38824482383229886,
+ "grad_norm": 23190.4140625,
+ "learning_rate": 7.894479976113298e-05,
+ "loss": 0.4394,
+ "step": 75250
+ },
+ {
+ "epoch": 0.3885027938149117,
+ "grad_norm": 21523.7265625,
+ "learning_rate": 7.891500530594771e-05,
+ "loss": 0.4441,
+ "step": 75300
+ },
+ {
+ "epoch": 0.3887607637975245,
+ "grad_norm": 22941.23828125,
+ "learning_rate": 7.888519541649253e-05,
+ "loss": 0.443,
+ "step": 75350
+ },
+ {
+ "epoch": 0.38901873378013735,
+ "grad_norm": 21467.90234375,
+ "learning_rate": 7.885537010867936e-05,
+ "loss": 0.4478,
+ "step": 75400
+ },
+ {
+ "epoch": 0.38927670376275014,
+ "grad_norm": 22635.732421875,
+ "learning_rate": 7.882552939842837e-05,
+ "loss": 0.4415,
+ "step": 75450
+ },
+ {
+ "epoch": 0.389534673745363,
+ "grad_norm": 21242.326171875,
+ "learning_rate": 7.879567330166797e-05,
+ "loss": 0.4352,
+ "step": 75500
+ },
+ {
+ "epoch": 0.38979264372797584,
+ "grad_norm": 20005.158203125,
+ "learning_rate": 7.876580183433475e-05,
+ "loss": 0.4393,
+ "step": 75550
+ },
+ {
+ "epoch": 0.39005061371058863,
+ "grad_norm": 23355.044921875,
+ "learning_rate": 7.873591501237351e-05,
+ "loss": 0.4465,
+ "step": 75600
+ },
+ {
+ "epoch": 0.3903085836932015,
+ "grad_norm": 21217.359375,
+ "learning_rate": 7.870601285173731e-05,
+ "loss": 0.4437,
+ "step": 75650
+ },
+ {
+ "epoch": 0.3905665536758143,
+ "grad_norm": 22424.580078125,
+ "learning_rate": 7.867609536838729e-05,
+ "loss": 0.4397,
+ "step": 75700
+ },
+ {
+ "epoch": 0.3908245236584271,
+ "grad_norm": 20943.65234375,
+ "learning_rate": 7.864616257829285e-05,
+ "loss": 0.4427,
+ "step": 75750
+ },
+ {
+ "epoch": 0.3910824936410399,
+ "grad_norm": 23246.5625,
+ "learning_rate": 7.861621449743152e-05,
+ "loss": 0.4479,
+ "step": 75800
+ },
+ {
+ "epoch": 0.39134046362365277,
+ "grad_norm": 21575.830078125,
+ "learning_rate": 7.858625114178902e-05,
+ "loss": 0.4384,
+ "step": 75850
+ },
+ {
+ "epoch": 0.39159843360626556,
+ "grad_norm": 22053.5546875,
+ "learning_rate": 7.855627252735918e-05,
+ "loss": 0.4364,
+ "step": 75900
+ },
+ {
+ "epoch": 0.3918564035888784,
+ "grad_norm": 21934.55078125,
+ "learning_rate": 7.852627867014406e-05,
+ "loss": 0.4466,
+ "step": 75950
+ },
+ {
+ "epoch": 0.3921143735714912,
+ "grad_norm": 20184.078125,
+ "learning_rate": 7.849626958615374e-05,
+ "loss": 0.4422,
+ "step": 76000
+ },
+ {
+ "epoch": 0.39237234355410405,
+ "grad_norm": 21770.923828125,
+ "learning_rate": 7.846624529140652e-05,
+ "loss": 0.4382,
+ "step": 76050
+ },
+ {
+ "epoch": 0.39263031353671685,
+ "grad_norm": 21592.16796875,
+ "learning_rate": 7.843620580192877e-05,
+ "loss": 0.4404,
+ "step": 76100
+ },
+ {
+ "epoch": 0.3928882835193297,
+ "grad_norm": 19634.1875,
+ "learning_rate": 7.8406151133755e-05,
+ "loss": 0.4443,
+ "step": 76150
+ },
+ {
+ "epoch": 0.3931462535019425,
+ "grad_norm": 24045.01171875,
+ "learning_rate": 7.837608130292782e-05,
+ "loss": 0.438,
+ "step": 76200
+ },
+ {
+ "epoch": 0.39340422348455534,
+ "grad_norm": 21739.921875,
+ "learning_rate": 7.83459963254979e-05,
+ "loss": 0.4474,
+ "step": 76250
+ },
+ {
+ "epoch": 0.3936621934671682,
+ "grad_norm": 20915.56640625,
+ "learning_rate": 7.831589621752405e-05,
+ "loss": 0.4463,
+ "step": 76300
+ },
+ {
+ "epoch": 0.393920163449781,
+ "grad_norm": 18799.80078125,
+ "learning_rate": 7.828578099507308e-05,
+ "loss": 0.4401,
+ "step": 76350
+ },
+ {
+ "epoch": 0.39417813343239383,
+ "grad_norm": 19029.51171875,
+ "learning_rate": 7.825565067421995e-05,
+ "loss": 0.4428,
+ "step": 76400
+ },
+ {
+ "epoch": 0.3944361034150066,
+ "grad_norm": 22817.376953125,
+ "learning_rate": 7.822550527104762e-05,
+ "loss": 0.4467,
+ "step": 76450
+ },
+ {
+ "epoch": 0.39469407339761947,
+ "grad_norm": 19165.529296875,
+ "learning_rate": 7.819534480164713e-05,
+ "loss": 0.4365,
+ "step": 76500
+ },
+ {
+ "epoch": 0.39495204338023226,
+ "grad_norm": 22980.056640625,
+ "learning_rate": 7.816516928211756e-05,
+ "loss": 0.4386,
+ "step": 76550
+ },
+ {
+ "epoch": 0.3952100133628451,
+ "grad_norm": 21261.7109375,
+ "learning_rate": 7.813497872856603e-05,
+ "loss": 0.4358,
+ "step": 76600
+ },
+ {
+ "epoch": 0.3954679833454579,
+ "grad_norm": 21533.779296875,
+ "learning_rate": 7.810477315710763e-05,
+ "loss": 0.4444,
+ "step": 76650
+ },
+ {
+ "epoch": 0.39572595332807076,
+ "grad_norm": 20503.556640625,
+ "learning_rate": 7.807455258386556e-05,
+ "loss": 0.4446,
+ "step": 76700
+ },
+ {
+ "epoch": 0.39598392331068355,
+ "grad_norm": 21180.939453125,
+ "learning_rate": 7.804431702497093e-05,
+ "loss": 0.4486,
+ "step": 76750
+ },
+ {
+ "epoch": 0.3962418932932964,
+ "grad_norm": 24126.484375,
+ "learning_rate": 7.801406649656294e-05,
+ "loss": 0.4419,
+ "step": 76800
+ },
+ {
+ "epoch": 0.3964998632759092,
+ "grad_norm": 19791.345703125,
+ "learning_rate": 7.79838010147887e-05,
+ "loss": 0.4499,
+ "step": 76850
+ },
+ {
+ "epoch": 0.39675783325852204,
+ "grad_norm": 21118.822265625,
+ "learning_rate": 7.795352059580334e-05,
+ "loss": 0.4403,
+ "step": 76900
+ },
+ {
+ "epoch": 0.39701580324113483,
+ "grad_norm": 20787.6015625,
+ "learning_rate": 7.792322525577e-05,
+ "loss": 0.4394,
+ "step": 76950
+ },
+ {
+ "epoch": 0.3972737732237477,
+ "grad_norm": 21575.86328125,
+ "learning_rate": 7.789291501085972e-05,
+ "loss": 0.4482,
+ "step": 77000
+ },
+ {
+ "epoch": 0.39753174320636053,
+ "grad_norm": 21271.287109375,
+ "learning_rate": 7.78625898772515e-05,
+ "loss": 0.4413,
+ "step": 77050
+ },
+ {
+ "epoch": 0.3977897131889733,
+ "grad_norm": 21294.7890625,
+ "learning_rate": 7.783224987113235e-05,
+ "loss": 0.4393,
+ "step": 77100
+ },
+ {
+ "epoch": 0.3980476831715862,
+ "grad_norm": 21880.341796875,
+ "learning_rate": 7.780189500869716e-05,
+ "loss": 0.4464,
+ "step": 77150
+ },
+ {
+ "epoch": 0.39830565315419897,
+ "grad_norm": 22501.482421875,
+ "learning_rate": 7.777152530614876e-05,
+ "loss": 0.4384,
+ "step": 77200
+ },
+ {
+ "epoch": 0.3985636231368118,
+ "grad_norm": 20404.89453125,
+ "learning_rate": 7.774114077969792e-05,
+ "loss": 0.4355,
+ "step": 77250
+ },
+ {
+ "epoch": 0.3988215931194246,
+ "grad_norm": 21435.66015625,
+ "learning_rate": 7.77107414455633e-05,
+ "loss": 0.4468,
+ "step": 77300
+ },
+ {
+ "epoch": 0.39907956310203746,
+ "grad_norm": 20239.091796875,
+ "learning_rate": 7.768032731997148e-05,
+ "loss": 0.4453,
+ "step": 77350
+ },
+ {
+ "epoch": 0.39933753308465025,
+ "grad_norm": 19040.37109375,
+ "learning_rate": 7.764989841915694e-05,
+ "loss": 0.4487,
+ "step": 77400
+ },
+ {
+ "epoch": 0.3995955030672631,
+ "grad_norm": 22501.13671875,
+ "learning_rate": 7.761945475936203e-05,
+ "loss": 0.4488,
+ "step": 77450
+ },
+ {
+ "epoch": 0.3998534730498759,
+ "grad_norm": 20773.27734375,
+ "learning_rate": 7.7588996356837e-05,
+ "loss": 0.4384,
+ "step": 77500
+ },
+ {
+ "epoch": 0.40011144303248874,
+ "grad_norm": 22598.4140625,
+ "learning_rate": 7.755852322783994e-05,
+ "loss": 0.4358,
+ "step": 77550
+ },
+ {
+ "epoch": 0.40036941301510154,
+ "grad_norm": 20656.033203125,
+ "learning_rate": 7.752803538863683e-05,
+ "loss": 0.4434,
+ "step": 77600
+ },
+ {
+ "epoch": 0.4006273829977144,
+ "grad_norm": 20882.3125,
+ "learning_rate": 7.749753285550146e-05,
+ "loss": 0.4408,
+ "step": 77650
+ },
+ {
+ "epoch": 0.40088535298032724,
+ "grad_norm": 19519.408203125,
+ "learning_rate": 7.746701564471553e-05,
+ "loss": 0.439,
+ "step": 77700
+ },
+ {
+ "epoch": 0.40114332296294003,
+ "grad_norm": 21141.80859375,
+ "learning_rate": 7.74364837725685e-05,
+ "loss": 0.4422,
+ "step": 77750
+ },
+ {
+ "epoch": 0.4014012929455529,
+ "grad_norm": 21487.45703125,
+ "learning_rate": 7.74059372553577e-05,
+ "loss": 0.429,
+ "step": 77800
+ },
+ {
+ "epoch": 0.4016592629281657,
+ "grad_norm": 19889.447265625,
+ "learning_rate": 7.737537610938829e-05,
+ "loss": 0.4474,
+ "step": 77850
+ },
+ {
+ "epoch": 0.4019172329107785,
+ "grad_norm": 21914.947265625,
+ "learning_rate": 7.73448003509732e-05,
+ "loss": 0.4403,
+ "step": 77900
+ },
+ {
+ "epoch": 0.4021752028933913,
+ "grad_norm": 24025.521484375,
+ "learning_rate": 7.731420999643319e-05,
+ "loss": 0.4432,
+ "step": 77950
+ },
+ {
+ "epoch": 0.40243317287600416,
+ "grad_norm": 19703.50390625,
+ "learning_rate": 7.728360506209679e-05,
+ "loss": 0.443,
+ "step": 78000
+ },
+ {
+ "epoch": 0.40269114285861696,
+ "grad_norm": 21566.37890625,
+ "learning_rate": 7.725298556430034e-05,
+ "loss": 0.448,
+ "step": 78050
+ },
+ {
+ "epoch": 0.4029491128412298,
+ "grad_norm": 21902.564453125,
+ "learning_rate": 7.72223515193879e-05,
+ "loss": 0.438,
+ "step": 78100
+ },
+ {
+ "epoch": 0.4032070828238426,
+ "grad_norm": 20892.7578125,
+ "learning_rate": 7.719170294371136e-05,
+ "loss": 0.4382,
+ "step": 78150
+ },
+ {
+ "epoch": 0.40346505280645545,
+ "grad_norm": 21648.673828125,
+ "learning_rate": 7.716103985363033e-05,
+ "loss": 0.4378,
+ "step": 78200
+ },
+ {
+ "epoch": 0.40372302278906824,
+ "grad_norm": 23124.40625,
+ "learning_rate": 7.713036226551215e-05,
+ "loss": 0.442,
+ "step": 78250
+ },
+ {
+ "epoch": 0.4039809927716811,
+ "grad_norm": 25006.751953125,
+ "learning_rate": 7.709967019573195e-05,
+ "loss": 0.4397,
+ "step": 78300
+ },
+ {
+ "epoch": 0.4042389627542939,
+ "grad_norm": 20722.802734375,
+ "learning_rate": 7.706896366067256e-05,
+ "loss": 0.4388,
+ "step": 78350
+ },
+ {
+ "epoch": 0.40449693273690673,
+ "grad_norm": 20202.013671875,
+ "learning_rate": 7.703824267672452e-05,
+ "loss": 0.4404,
+ "step": 78400
+ },
+ {
+ "epoch": 0.4047549027195196,
+ "grad_norm": 21261.9375,
+ "learning_rate": 7.700750726028609e-05,
+ "loss": 0.4369,
+ "step": 78450
+ },
+ {
+ "epoch": 0.4050128727021324,
+ "grad_norm": 25343.57421875,
+ "learning_rate": 7.69767574277633e-05,
+ "loss": 0.4444,
+ "step": 78500
+ },
+ {
+ "epoch": 0.4052708426847452,
+ "grad_norm": 20222.767578125,
+ "learning_rate": 7.694599319556972e-05,
+ "loss": 0.4425,
+ "step": 78550
+ },
+ {
+ "epoch": 0.405528812667358,
+ "grad_norm": 22934.466796875,
+ "learning_rate": 7.691521458012678e-05,
+ "loss": 0.4411,
+ "step": 78600
+ },
+ {
+ "epoch": 0.40578678264997087,
+ "grad_norm": 22235.30078125,
+ "learning_rate": 7.688442159786346e-05,
+ "loss": 0.4445,
+ "step": 78650
+ },
+ {
+ "epoch": 0.40604475263258366,
+ "grad_norm": 21313.986328125,
+ "learning_rate": 7.68536142652165e-05,
+ "loss": 0.4341,
+ "step": 78700
+ },
+ {
+ "epoch": 0.4063027226151965,
+ "grad_norm": 20130.53515625,
+ "learning_rate": 7.68227925986302e-05,
+ "loss": 0.4395,
+ "step": 78750
+ },
+ {
+ "epoch": 0.4065606925978093,
+ "grad_norm": 19342.740234375,
+ "learning_rate": 7.679195661455664e-05,
+ "loss": 0.4424,
+ "step": 78800
+ },
+ {
+ "epoch": 0.40681866258042215,
+ "grad_norm": 21876.705078125,
+ "learning_rate": 7.676110632945543e-05,
+ "loss": 0.4415,
+ "step": 78850
+ },
+ {
+ "epoch": 0.40707663256303495,
+ "grad_norm": 23199.501953125,
+ "learning_rate": 7.673024175979384e-05,
+ "loss": 0.4423,
+ "step": 78900
+ },
+ {
+ "epoch": 0.4073346025456478,
+ "grad_norm": 22781.091796875,
+ "learning_rate": 7.669936292204683e-05,
+ "loss": 0.4398,
+ "step": 78950
+ },
+ {
+ "epoch": 0.4075925725282606,
+ "grad_norm": 24025.9375,
+ "learning_rate": 7.666846983269688e-05,
+ "loss": 0.4326,
+ "step": 79000
+ },
+ {
+ "epoch": 0.40785054251087344,
+ "grad_norm": 20797.056640625,
+ "learning_rate": 7.663756250823413e-05,
+ "loss": 0.4388,
+ "step": 79050
+ },
+ {
+ "epoch": 0.40810851249348623,
+ "grad_norm": 25106.67578125,
+ "learning_rate": 7.660664096515632e-05,
+ "loss": 0.4385,
+ "step": 79100
+ },
+ {
+ "epoch": 0.4083664824760991,
+ "grad_norm": 22217.36328125,
+ "learning_rate": 7.657570521996877e-05,
+ "loss": 0.4455,
+ "step": 79150
+ },
+ {
+ "epoch": 0.40862445245871193,
+ "grad_norm": 21679.291015625,
+ "learning_rate": 7.654475528918439e-05,
+ "loss": 0.4409,
+ "step": 79200
+ },
+ {
+ "epoch": 0.4088824224413247,
+ "grad_norm": 20133.583984375,
+ "learning_rate": 7.651379118932364e-05,
+ "loss": 0.4391,
+ "step": 79250
+ },
+ {
+ "epoch": 0.40914039242393757,
+ "grad_norm": 23019.171875,
+ "learning_rate": 7.648281293691457e-05,
+ "loss": 0.446,
+ "step": 79300
+ },
+ {
+ "epoch": 0.40939836240655036,
+ "grad_norm": 24098.38671875,
+ "learning_rate": 7.645182054849276e-05,
+ "loss": 0.4417,
+ "step": 79350
+ },
+ {
+ "epoch": 0.4096563323891632,
+ "grad_norm": 23057.240234375,
+ "learning_rate": 7.642081404060136e-05,
+ "loss": 0.4424,
+ "step": 79400
+ },
+ {
+ "epoch": 0.409914302371776,
+ "grad_norm": 20033.328125,
+ "learning_rate": 7.638979342979103e-05,
+ "loss": 0.4386,
+ "step": 79450
+ },
+ {
+ "epoch": 0.41017227235438886,
+ "grad_norm": 20978.68359375,
+ "learning_rate": 7.635875873261995e-05,
+ "loss": 0.4363,
+ "step": 79500
+ },
+ {
+ "epoch": 0.41043024233700165,
+ "grad_norm": 21347.068359375,
+ "learning_rate": 7.63277099656539e-05,
+ "loss": 0.4431,
+ "step": 79550
+ },
+ {
+ "epoch": 0.4106882123196145,
+ "grad_norm": 22031.8125,
+ "learning_rate": 7.629664714546604e-05,
+ "loss": 0.4313,
+ "step": 79600
+ },
+ {
+ "epoch": 0.4109461823022273,
+ "grad_norm": 23963.99609375,
+ "learning_rate": 7.626557028863717e-05,
+ "loss": 0.4363,
+ "step": 79650
+ },
+ {
+ "epoch": 0.41120415228484014,
+ "grad_norm": 20183.259765625,
+ "learning_rate": 7.623447941175548e-05,
+ "loss": 0.4419,
+ "step": 79700
+ },
+ {
+ "epoch": 0.41146212226745293,
+ "grad_norm": 23588.68359375,
+ "learning_rate": 7.620337453141667e-05,
+ "loss": 0.4388,
+ "step": 79750
+ },
+ {
+ "epoch": 0.4117200922500658,
+ "grad_norm": 22210.7265625,
+ "learning_rate": 7.617225566422395e-05,
+ "loss": 0.442,
+ "step": 79800
+ },
+ {
+ "epoch": 0.41197806223267863,
+ "grad_norm": 18647.93359375,
+ "learning_rate": 7.614112282678794e-05,
+ "loss": 0.4349,
+ "step": 79850
+ },
+ {
+ "epoch": 0.4122360322152914,
+ "grad_norm": 20993.388671875,
+ "learning_rate": 7.610997603572675e-05,
+ "loss": 0.4386,
+ "step": 79900
+ },
+ {
+ "epoch": 0.4124940021979043,
+ "grad_norm": 23693.26171875,
+ "learning_rate": 7.607881530766596e-05,
+ "loss": 0.4385,
+ "step": 79950
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "grad_norm": 22608.26953125,
+ "learning_rate": 7.604764065923852e-05,
+ "loss": 0.4415,
+ "step": 80000
+ },
+ {
+ "epoch": 0.41275197218051707,
+ "eval_loss": 0.4290848970413208,
+ "eval_runtime": 3332.9887,
+ "eval_samples_per_second": 930.432,
+ "eval_steps_per_second": 1.817,
+ "step": 80000
+ },
+ {
+ "epoch": 0.4130099421631299,
+ "grad_norm": 23348.44921875,
+ "learning_rate": 7.60164521070849e-05,
+ "loss": 0.4392,
+ "step": 80050
+ },
+ {
+ "epoch": 0.4132679121457427,
+ "grad_norm": 19942.9921875,
+ "learning_rate": 7.598524966785293e-05,
+ "loss": 0.4362,
+ "step": 80100
+ },
+ {
+ "epoch": 0.41352588212835556,
+ "grad_norm": 22776.587890625,
+ "learning_rate": 7.595403335819786e-05,
+ "loss": 0.4402,
+ "step": 80150
+ },
+ {
+ "epoch": 0.41378385211096835,
+ "grad_norm": 22519.923828125,
+ "learning_rate": 7.592280319478233e-05,
+ "loss": 0.4412,
+ "step": 80200
+ },
+ {
+ "epoch": 0.4140418220935812,
+ "grad_norm": 22480.52734375,
+ "learning_rate": 7.589155919427645e-05,
+ "loss": 0.4393,
+ "step": 80250
+ },
+ {
+ "epoch": 0.414299792076194,
+ "grad_norm": 20900.625,
+ "learning_rate": 7.586030137335762e-05,
+ "loss": 0.4344,
+ "step": 80300
+ },
+ {
+ "epoch": 0.41455776205880684,
+ "grad_norm": 21272.306640625,
+ "learning_rate": 7.582902974871069e-05,
+ "loss": 0.4385,
+ "step": 80350
+ },
+ {
+ "epoch": 0.41481573204141964,
+ "grad_norm": 21448.478515625,
+ "learning_rate": 7.57977443370278e-05,
+ "loss": 0.4395,
+ "step": 80400
+ },
+ {
+ "epoch": 0.4150737020240325,
+ "grad_norm": 21854.537109375,
+ "learning_rate": 7.576644515500855e-05,
+ "loss": 0.4411,
+ "step": 80450
+ },
+ {
+ "epoch": 0.4153316720066453,
+ "grad_norm": 21458.689453125,
+ "learning_rate": 7.573513221935979e-05,
+ "loss": 0.4429,
+ "step": 80500
+ },
+ {
+ "epoch": 0.41558964198925813,
+ "grad_norm": 21895.71875,
+ "learning_rate": 7.57038055467958e-05,
+ "loss": 0.4391,
+ "step": 80550
+ },
+ {
+ "epoch": 0.415847611971871,
+ "grad_norm": 23495.921875,
+ "learning_rate": 7.567246515403812e-05,
+ "loss": 0.4398,
+ "step": 80600
+ },
+ {
+ "epoch": 0.41610558195448377,
+ "grad_norm": 26117.8671875,
+ "learning_rate": 7.564111105781568e-05,
+ "loss": 0.4407,
+ "step": 80650
+ },
+ {
+ "epoch": 0.4163635519370966,
+ "grad_norm": 21881.818359375,
+ "learning_rate": 7.560974327486466e-05,
+ "loss": 0.4336,
+ "step": 80700
+ },
+ {
+ "epoch": 0.4166215219197094,
+ "grad_norm": 21309.1015625,
+ "learning_rate": 7.557836182192859e-05,
+ "loss": 0.4371,
+ "step": 80750
+ },
+ {
+ "epoch": 0.41687949190232226,
+ "grad_norm": 21723.498046875,
+ "learning_rate": 7.554696671575826e-05,
+ "loss": 0.4384,
+ "step": 80800
+ },
+ {
+ "epoch": 0.41713746188493506,
+ "grad_norm": 19767.9609375,
+ "learning_rate": 7.55155579731118e-05,
+ "loss": 0.4375,
+ "step": 80850
+ },
+ {
+ "epoch": 0.4173954318675479,
+ "grad_norm": 18992.958984375,
+ "learning_rate": 7.548413561075456e-05,
+ "loss": 0.4419,
+ "step": 80900
+ },
+ {
+ "epoch": 0.4176534018501607,
+ "grad_norm": 21593.255859375,
+ "learning_rate": 7.545269964545921e-05,
+ "loss": 0.4372,
+ "step": 80950
+ },
+ {
+ "epoch": 0.41791137183277355,
+ "grad_norm": 19369.3125,
+ "learning_rate": 7.542125009400565e-05,
+ "loss": 0.4402,
+ "step": 81000
+ },
+ {
+ "epoch": 0.41816934181538634,
+ "grad_norm": 20552.06640625,
+ "learning_rate": 7.538978697318105e-05,
+ "loss": 0.4418,
+ "step": 81050
+ },
+ {
+ "epoch": 0.4184273117979992,
+ "grad_norm": 21554.94140625,
+ "learning_rate": 7.53583102997798e-05,
+ "loss": 0.4406,
+ "step": 81100
+ },
+ {
+ "epoch": 0.418685281780612,
+ "grad_norm": 21098.296875,
+ "learning_rate": 7.532682009060356e-05,
+ "loss": 0.443,
+ "step": 81150
+ },
+ {
+ "epoch": 0.41894325176322483,
+ "grad_norm": 24148.71484375,
+ "learning_rate": 7.529531636246116e-05,
+ "loss": 0.4345,
+ "step": 81200
+ },
+ {
+ "epoch": 0.4192012217458376,
+ "grad_norm": 20404.298828125,
+ "learning_rate": 7.526379913216872e-05,
+ "loss": 0.4335,
+ "step": 81250
+ },
+ {
+ "epoch": 0.4194591917284505,
+ "grad_norm": 22061.607421875,
+ "learning_rate": 7.52322684165495e-05,
+ "loss": 0.4385,
+ "step": 81300
+ },
+ {
+ "epoch": 0.4197171617110633,
+ "grad_norm": 18455.380859375,
+ "learning_rate": 7.520072423243398e-05,
+ "loss": 0.4337,
+ "step": 81350
+ },
+ {
+ "epoch": 0.4199751316936761,
+ "grad_norm": 23344.2734375,
+ "learning_rate": 7.516916659665987e-05,
+ "loss": 0.4401,
+ "step": 81400
+ },
+ {
+ "epoch": 0.42023310167628897,
+ "grad_norm": 20872.77734375,
+ "learning_rate": 7.5137595526072e-05,
+ "loss": 0.4394,
+ "step": 81450
+ },
+ {
+ "epoch": 0.42049107165890176,
+ "grad_norm": 21003.841796875,
+ "learning_rate": 7.51060110375224e-05,
+ "loss": 0.4402,
+ "step": 81500
+ },
+ {
+ "epoch": 0.4207490416415146,
+ "grad_norm": 22772.330078125,
+ "learning_rate": 7.507441314787025e-05,
+ "loss": 0.4438,
+ "step": 81550
+ },
+ {
+ "epoch": 0.4210070116241274,
+ "grad_norm": 19593.216796875,
+ "learning_rate": 7.504280187398189e-05,
+ "loss": 0.4375,
+ "step": 81600
+ },
+ {
+ "epoch": 0.42126498160674025,
+ "grad_norm": 20914.66796875,
+ "learning_rate": 7.501117723273084e-05,
+ "loss": 0.4397,
+ "step": 81650
+ },
+ {
+ "epoch": 0.42152295158935305,
+ "grad_norm": 20479.12109375,
+ "learning_rate": 7.497953924099768e-05,
+ "loss": 0.4365,
+ "step": 81700
+ },
+ {
+ "epoch": 0.4217809215719659,
+ "grad_norm": 20309.25,
+ "learning_rate": 7.494788791567017e-05,
+ "loss": 0.4461,
+ "step": 81750
+ },
+ {
+ "epoch": 0.4220388915545787,
+ "grad_norm": 21467.72265625,
+ "learning_rate": 7.491622327364318e-05,
+ "loss": 0.4354,
+ "step": 81800
+ },
+ {
+ "epoch": 0.42229686153719154,
+ "grad_norm": 20826.80859375,
+ "learning_rate": 7.488454533181871e-05,
+ "loss": 0.4398,
+ "step": 81850
+ },
+ {
+ "epoch": 0.42255483151980433,
+ "grad_norm": 20537.826171875,
+ "learning_rate": 7.485285410710577e-05,
+ "loss": 0.4443,
+ "step": 81900
+ },
+ {
+ "epoch": 0.4228128015024172,
+ "grad_norm": 19521.810546875,
+ "learning_rate": 7.482114961642057e-05,
+ "loss": 0.4379,
+ "step": 81950
+ },
+ {
+ "epoch": 0.42307077148503003,
+ "grad_norm": 19407.5234375,
+ "learning_rate": 7.478943187668633e-05,
+ "loss": 0.4429,
+ "step": 82000
+ },
+ {
+ "epoch": 0.4233287414676428,
+ "grad_norm": 23058.337890625,
+ "learning_rate": 7.475770090483338e-05,
+ "loss": 0.4362,
+ "step": 82050
+ },
+ {
+ "epoch": 0.42358671145025567,
+ "grad_norm": 27362.29296875,
+ "learning_rate": 7.472595671779907e-05,
+ "loss": 0.4413,
+ "step": 82100
+ },
+ {
+ "epoch": 0.42384468143286846,
+ "grad_norm": 20389.08203125,
+ "learning_rate": 7.469419933252789e-05,
+ "loss": 0.4386,
+ "step": 82150
+ },
+ {
+ "epoch": 0.4241026514154813,
+ "grad_norm": 21554.896484375,
+ "learning_rate": 7.466242876597125e-05,
+ "loss": 0.4387,
+ "step": 82200
+ },
+ {
+ "epoch": 0.4243606213980941,
+ "grad_norm": 23449.822265625,
+ "learning_rate": 7.463064503508772e-05,
+ "loss": 0.4402,
+ "step": 82250
+ },
+ {
+ "epoch": 0.42461859138070696,
+ "grad_norm": 23945.1328125,
+ "learning_rate": 7.459884815684279e-05,
+ "loss": 0.4393,
+ "step": 82300
+ },
+ {
+ "epoch": 0.42487656136331975,
+ "grad_norm": 21705.064453125,
+ "learning_rate": 7.456703814820904e-05,
+ "loss": 0.4374,
+ "step": 82350
+ },
+ {
+ "epoch": 0.4251345313459326,
+ "grad_norm": 20050.66796875,
+ "learning_rate": 7.453521502616607e-05,
+ "loss": 0.4433,
+ "step": 82400
+ },
+ {
+ "epoch": 0.4253925013285454,
+ "grad_norm": 24757.845703125,
+ "learning_rate": 7.45033788077004e-05,
+ "loss": 0.4362,
+ "step": 82450
+ },
+ {
+ "epoch": 0.42565047131115824,
+ "grad_norm": 21754.42578125,
+ "learning_rate": 7.44715295098056e-05,
+ "loss": 0.4386,
+ "step": 82500
+ },
+ {
+ "epoch": 0.42590844129377103,
+ "grad_norm": 22891.12890625,
+ "learning_rate": 7.443966714948222e-05,
+ "loss": 0.4438,
+ "step": 82550
+ },
+ {
+ "epoch": 0.4261664112763839,
+ "grad_norm": 22174.580078125,
+ "learning_rate": 7.440779174373776e-05,
+ "loss": 0.4388,
+ "step": 82600
+ },
+ {
+ "epoch": 0.4264243812589967,
+ "grad_norm": 20407.677734375,
+ "learning_rate": 7.43759033095867e-05,
+ "loss": 0.4412,
+ "step": 82650
+ },
+ {
+ "epoch": 0.4266823512416095,
+ "grad_norm": 21960.552734375,
+ "learning_rate": 7.434400186405045e-05,
+ "loss": 0.4394,
+ "step": 82700
+ },
+ {
+ "epoch": 0.4269403212242224,
+ "grad_norm": 20736.583984375,
+ "learning_rate": 7.431208742415741e-05,
+ "loss": 0.4382,
+ "step": 82750
+ },
+ {
+ "epoch": 0.42719829120683517,
+ "grad_norm": 21133.63671875,
+ "learning_rate": 7.428016000694286e-05,
+ "loss": 0.4379,
+ "step": 82800
+ },
+ {
+ "epoch": 0.427456261189448,
+ "grad_norm": 23741.525390625,
+ "learning_rate": 7.424821962944908e-05,
+ "loss": 0.4398,
+ "step": 82850
+ },
+ {
+ "epoch": 0.4277142311720608,
+ "grad_norm": 21936.802734375,
+ "learning_rate": 7.42162663087252e-05,
+ "loss": 0.4383,
+ "step": 82900
+ },
+ {
+ "epoch": 0.42797220115467366,
+ "grad_norm": 24459.85546875,
+ "learning_rate": 7.418430006182727e-05,
+ "loss": 0.4393,
+ "step": 82950
+ },
+ {
+ "epoch": 0.42823017113728645,
+ "grad_norm": 21729.9921875,
+ "learning_rate": 7.415232090581828e-05,
+ "loss": 0.4421,
+ "step": 83000
+ },
+ {
+ "epoch": 0.4284881411198993,
+ "grad_norm": 21081.5703125,
+ "learning_rate": 7.412032885776807e-05,
+ "loss": 0.4414,
+ "step": 83050
+ },
+ {
+ "epoch": 0.4287461111025121,
+ "grad_norm": 20296.740234375,
+ "learning_rate": 7.408832393475338e-05,
+ "loss": 0.4316,
+ "step": 83100
+ },
+ {
+ "epoch": 0.42900408108512494,
+ "grad_norm": 20874.30078125,
+ "learning_rate": 7.405630615385781e-05,
+ "loss": 0.433,
+ "step": 83150
+ },
+ {
+ "epoch": 0.42926205106773774,
+ "grad_norm": 20673.11328125,
+ "learning_rate": 7.402427553217183e-05,
+ "loss": 0.4386,
+ "step": 83200
+ },
+ {
+ "epoch": 0.4295200210503506,
+ "grad_norm": 22462.07421875,
+ "learning_rate": 7.39922320867928e-05,
+ "loss": 0.4464,
+ "step": 83250
+ },
+ {
+ "epoch": 0.4297779910329634,
+ "grad_norm": 20411.771484375,
+ "learning_rate": 7.396017583482487e-05,
+ "loss": 0.444,
+ "step": 83300
+ },
+ {
+ "epoch": 0.43003596101557623,
+ "grad_norm": 21137.6953125,
+ "learning_rate": 7.392810679337902e-05,
+ "loss": 0.4416,
+ "step": 83350
+ },
+ {
+ "epoch": 0.4302939309981891,
+ "grad_norm": 23059.064453125,
+ "learning_rate": 7.38960249795731e-05,
+ "loss": 0.4401,
+ "step": 83400
+ },
+ {
+ "epoch": 0.43055190098080187,
+ "grad_norm": 20305.22265625,
+ "learning_rate": 7.386393041053176e-05,
+ "loss": 0.4399,
+ "step": 83450
+ },
+ {
+ "epoch": 0.4308098709634147,
+ "grad_norm": 22247.779296875,
+ "learning_rate": 7.38318231033865e-05,
+ "loss": 0.4362,
+ "step": 83500
+ },
+ {
+ "epoch": 0.4310678409460275,
+ "grad_norm": 22231.337890625,
+ "learning_rate": 7.379970307527552e-05,
+ "loss": 0.4417,
+ "step": 83550
+ },
+ {
+ "epoch": 0.43132581092864036,
+ "grad_norm": 21788.875,
+ "learning_rate": 7.376757034334388e-05,
+ "loss": 0.4374,
+ "step": 83600
+ },
+ {
+ "epoch": 0.43158378091125316,
+ "grad_norm": 22237.51953125,
+ "learning_rate": 7.373542492474343e-05,
+ "loss": 0.4372,
+ "step": 83650
+ },
+ {
+ "epoch": 0.431841750893866,
+ "grad_norm": 21732.943359375,
+ "learning_rate": 7.370326683663278e-05,
+ "loss": 0.4395,
+ "step": 83700
+ },
+ {
+ "epoch": 0.4320997208764788,
+ "grad_norm": 19517.212890625,
+ "learning_rate": 7.367109609617729e-05,
+ "loss": 0.4371,
+ "step": 83750
+ },
+ {
+ "epoch": 0.43235769085909165,
+ "grad_norm": 23681.388671875,
+ "learning_rate": 7.363891272054903e-05,
+ "loss": 0.4383,
+ "step": 83800
+ },
+ {
+ "epoch": 0.43261566084170444,
+ "grad_norm": 23889.822265625,
+ "learning_rate": 7.360671672692691e-05,
+ "loss": 0.441,
+ "step": 83850
+ },
+ {
+ "epoch": 0.4328736308243173,
+ "grad_norm": 21159.45703125,
+ "learning_rate": 7.357450813249654e-05,
+ "loss": 0.4328,
+ "step": 83900
+ },
+ {
+ "epoch": 0.4331316008069301,
+ "grad_norm": 20617.83984375,
+ "learning_rate": 7.354228695445023e-05,
+ "loss": 0.4395,
+ "step": 83950
+ },
+ {
+ "epoch": 0.43338957078954293,
+ "grad_norm": 19741.568359375,
+ "learning_rate": 7.351005320998699e-05,
+ "loss": 0.4356,
+ "step": 84000
+ },
+ {
+ "epoch": 0.4336475407721557,
+ "grad_norm": 21407.771484375,
+ "learning_rate": 7.347780691631259e-05,
+ "loss": 0.4322,
+ "step": 84050
+ },
+ {
+ "epoch": 0.4339055107547686,
+ "grad_norm": 22396.5625,
+ "learning_rate": 7.344554809063947e-05,
+ "loss": 0.4379,
+ "step": 84100
+ },
+ {
+ "epoch": 0.4341634807373814,
+ "grad_norm": 23536.361328125,
+ "learning_rate": 7.34132767501868e-05,
+ "loss": 0.4372,
+ "step": 84150
+ },
+ {
+ "epoch": 0.4344214507199942,
+ "grad_norm": 23622.90234375,
+ "learning_rate": 7.338099291218036e-05,
+ "loss": 0.4361,
+ "step": 84200
+ },
+ {
+ "epoch": 0.43467942070260707,
+ "grad_norm": 24463.931640625,
+ "learning_rate": 7.334869659385264e-05,
+ "loss": 0.4478,
+ "step": 84250
+ },
+ {
+ "epoch": 0.43493739068521986,
+ "grad_norm": 21666.328125,
+ "learning_rate": 7.331638781244283e-05,
+ "loss": 0.4387,
+ "step": 84300
+ },
+ {
+ "epoch": 0.4351953606678327,
+ "grad_norm": 21145.6875,
+ "learning_rate": 7.328406658519669e-05,
+ "loss": 0.4362,
+ "step": 84350
+ },
+ {
+ "epoch": 0.4354533306504455,
+ "grad_norm": 21766.228515625,
+ "learning_rate": 7.325173292936667e-05,
+ "loss": 0.4433,
+ "step": 84400
+ },
+ {
+ "epoch": 0.43571130063305835,
+ "grad_norm": 23118.056640625,
+ "learning_rate": 7.321938686221185e-05,
+ "loss": 0.4317,
+ "step": 84450
+ },
+ {
+ "epoch": 0.43596927061567115,
+ "grad_norm": 20925.833984375,
+ "learning_rate": 7.318702840099793e-05,
+ "loss": 0.4348,
+ "step": 84500
+ },
+ {
+ "epoch": 0.436227240598284,
+ "grad_norm": 21725.630859375,
+ "learning_rate": 7.315465756299727e-05,
+ "loss": 0.4363,
+ "step": 84550
+ },
+ {
+ "epoch": 0.4364852105808968,
+ "grad_norm": 20223.537109375,
+ "learning_rate": 7.312227436548875e-05,
+ "loss": 0.4363,
+ "step": 84600
+ },
+ {
+ "epoch": 0.43674318056350964,
+ "grad_norm": 22766.71484375,
+ "learning_rate": 7.308987882575793e-05,
+ "loss": 0.442,
+ "step": 84650
+ },
+ {
+ "epoch": 0.43700115054612243,
+ "grad_norm": 20453.341796875,
+ "learning_rate": 7.305747096109688e-05,
+ "loss": 0.4362,
+ "step": 84700
+ },
+ {
+ "epoch": 0.4372591205287353,
+ "grad_norm": 20761.466796875,
+ "learning_rate": 7.302505078880431e-05,
+ "loss": 0.435,
+ "step": 84750
+ },
+ {
+ "epoch": 0.4375170905113481,
+ "grad_norm": 20815.27734375,
+ "learning_rate": 7.299261832618551e-05,
+ "loss": 0.4398,
+ "step": 84800
+ },
+ {
+ "epoch": 0.4377750604939609,
+ "grad_norm": 22528.06640625,
+ "learning_rate": 7.296017359055224e-05,
+ "loss": 0.44,
+ "step": 84850
+ },
+ {
+ "epoch": 0.43803303047657377,
+ "grad_norm": 21391.71484375,
+ "learning_rate": 7.292771659922293e-05,
+ "loss": 0.4376,
+ "step": 84900
+ },
+ {
+ "epoch": 0.43829100045918656,
+ "grad_norm": 21485.966796875,
+ "learning_rate": 7.289524736952245e-05,
+ "loss": 0.4424,
+ "step": 84950
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "grad_norm": 21160.314453125,
+ "learning_rate": 7.286276591878228e-05,
+ "loss": 0.4473,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4385489704417994,
+ "eval_loss": 0.4252757728099823,
+ "eval_runtime": 3252.991,
+ "eval_samples_per_second": 953.313,
+ "eval_steps_per_second": 1.862,
+ "step": 85000
+ },
+ {
+ "epoch": 0.4388069404244122,
+ "grad_norm": 29667.109375,
+ "learning_rate": 7.283027226434036e-05,
+ "loss": 0.4414,
+ "step": 85050
+ },
+ {
+ "epoch": 0.43906491040702506,
+ "grad_norm": 24990.86328125,
+ "learning_rate": 7.27977664235412e-05,
+ "loss": 0.4321,
+ "step": 85100
+ },
+ {
+ "epoch": 0.43932288038963785,
+ "grad_norm": 21708.86328125,
+ "learning_rate": 7.276524841373576e-05,
+ "loss": 0.4331,
+ "step": 85150
+ },
+ {
+ "epoch": 0.4395808503722507,
+ "grad_norm": 22323.1015625,
+ "learning_rate": 7.273271825228157e-05,
+ "loss": 0.4372,
+ "step": 85200
+ },
+ {
+ "epoch": 0.4398388203548635,
+ "grad_norm": 21696.2734375,
+ "learning_rate": 7.270017595654255e-05,
+ "loss": 0.4271,
+ "step": 85250
+ },
+ {
+ "epoch": 0.44009679033747634,
+ "grad_norm": 23364.560546875,
+ "learning_rate": 7.266762154388917e-05,
+ "loss": 0.4327,
+ "step": 85300
+ },
+ {
+ "epoch": 0.44035476032008913,
+ "grad_norm": 21834.607421875,
+ "learning_rate": 7.263505503169834e-05,
+ "loss": 0.4337,
+ "step": 85350
+ },
+ {
+ "epoch": 0.440612730302702,
+ "grad_norm": 18636.244140625,
+ "learning_rate": 7.260247643735343e-05,
+ "loss": 0.4393,
+ "step": 85400
+ },
+ {
+ "epoch": 0.4408707002853148,
+ "grad_norm": 20385.875,
+ "learning_rate": 7.256988577824427e-05,
+ "loss": 0.4398,
+ "step": 85450
+ },
+ {
+ "epoch": 0.4411286702679276,
+ "grad_norm": 21459.576171875,
+ "learning_rate": 7.253728307176713e-05,
+ "loss": 0.435,
+ "step": 85500
+ },
+ {
+ "epoch": 0.4413866402505405,
+ "grad_norm": 22838.716796875,
+ "learning_rate": 7.25046683353247e-05,
+ "loss": 0.4368,
+ "step": 85550
+ },
+ {
+ "epoch": 0.44164461023315327,
+ "grad_norm": 23016.4140625,
+ "learning_rate": 7.247204158632608e-05,
+ "loss": 0.4353,
+ "step": 85600
+ },
+ {
+ "epoch": 0.4419025802157661,
+ "grad_norm": 22318.193359375,
+ "learning_rate": 7.243940284218682e-05,
+ "loss": 0.4374,
+ "step": 85650
+ },
+ {
+ "epoch": 0.4421605501983789,
+ "grad_norm": 20475.376953125,
+ "learning_rate": 7.240675212032884e-05,
+ "loss": 0.4339,
+ "step": 85700
+ },
+ {
+ "epoch": 0.44241852018099176,
+ "grad_norm": 22276.287109375,
+ "learning_rate": 7.237408943818042e-05,
+ "loss": 0.4275,
+ "step": 85750
+ },
+ {
+ "epoch": 0.44267649016360455,
+ "grad_norm": 22131.654296875,
+ "learning_rate": 7.234141481317634e-05,
+ "loss": 0.4373,
+ "step": 85800
+ },
+ {
+ "epoch": 0.4429344601462174,
+ "grad_norm": 24779.14453125,
+ "learning_rate": 7.230872826275765e-05,
+ "loss": 0.4347,
+ "step": 85850
+ },
+ {
+ "epoch": 0.4431924301288302,
+ "grad_norm": 22474.443359375,
+ "learning_rate": 7.227602980437179e-05,
+ "loss": 0.4341,
+ "step": 85900
+ },
+ {
+ "epoch": 0.44345040011144304,
+ "grad_norm": 21620.056640625,
+ "learning_rate": 7.224331945547258e-05,
+ "loss": 0.4399,
+ "step": 85950
+ },
+ {
+ "epoch": 0.44370837009405584,
+ "grad_norm": 21546.8046875,
+ "learning_rate": 7.221059723352014e-05,
+ "loss": 0.4437,
+ "step": 86000
+ },
+ {
+ "epoch": 0.4439663400766687,
+ "grad_norm": 22283.0078125,
+ "learning_rate": 7.2177863155981e-05,
+ "loss": 0.4403,
+ "step": 86050
+ },
+ {
+ "epoch": 0.4442243100592815,
+ "grad_norm": 21332.576171875,
+ "learning_rate": 7.214511724032795e-05,
+ "loss": 0.4369,
+ "step": 86100
+ },
+ {
+ "epoch": 0.44448228004189433,
+ "grad_norm": 23106.01953125,
+ "learning_rate": 7.211235950404013e-05,
+ "loss": 0.4369,
+ "step": 86150
+ },
+ {
+ "epoch": 0.4447402500245071,
+ "grad_norm": 21826.2734375,
+ "learning_rate": 7.207958996460298e-05,
+ "loss": 0.4407,
+ "step": 86200
+ },
+ {
+ "epoch": 0.44499822000711997,
+ "grad_norm": 22308.90625,
+ "learning_rate": 7.204680863950825e-05,
+ "loss": 0.4349,
+ "step": 86250
+ },
+ {
+ "epoch": 0.4452561899897328,
+ "grad_norm": 24916.359375,
+ "learning_rate": 7.2014015546254e-05,
+ "loss": 0.436,
+ "step": 86300
+ },
+ {
+ "epoch": 0.4455141599723456,
+ "grad_norm": 22585.77734375,
+ "learning_rate": 7.198121070234453e-05,
+ "loss": 0.4311,
+ "step": 86350
+ },
+ {
+ "epoch": 0.44577212995495846,
+ "grad_norm": 22984.658203125,
+ "learning_rate": 7.194839412529042e-05,
+ "loss": 0.4324,
+ "step": 86400
+ },
+ {
+ "epoch": 0.44603009993757126,
+ "grad_norm": 22495.552734375,
+ "learning_rate": 7.191556583260853e-05,
+ "loss": 0.4306,
+ "step": 86450
+ },
+ {
+ "epoch": 0.4462880699201841,
+ "grad_norm": 21413.2578125,
+ "learning_rate": 7.188272584182196e-05,
+ "loss": 0.4404,
+ "step": 86500
+ },
+ {
+ "epoch": 0.4465460399027969,
+ "grad_norm": 23719.43359375,
+ "learning_rate": 7.184987417046007e-05,
+ "loss": 0.4321,
+ "step": 86550
+ },
+ {
+ "epoch": 0.44680400988540975,
+ "grad_norm": 22586.095703125,
+ "learning_rate": 7.181701083605846e-05,
+ "loss": 0.4349,
+ "step": 86600
+ },
+ {
+ "epoch": 0.44706197986802254,
+ "grad_norm": 20580.166015625,
+ "learning_rate": 7.178413585615891e-05,
+ "loss": 0.4323,
+ "step": 86650
+ },
+ {
+ "epoch": 0.4473199498506354,
+ "grad_norm": 21345.71875,
+ "learning_rate": 7.175124924830948e-05,
+ "loss": 0.4326,
+ "step": 86700
+ },
+ {
+ "epoch": 0.4475779198332482,
+ "grad_norm": 20615.333984375,
+ "learning_rate": 7.171835103006438e-05,
+ "loss": 0.4425,
+ "step": 86750
+ },
+ {
+ "epoch": 0.44783588981586103,
+ "grad_norm": 25518.546875,
+ "learning_rate": 7.168544121898407e-05,
+ "loss": 0.4307,
+ "step": 86800
+ },
+ {
+ "epoch": 0.4480938597984738,
+ "grad_norm": 23149.703125,
+ "learning_rate": 7.165251983263512e-05,
+ "loss": 0.4336,
+ "step": 86850
+ },
+ {
+ "epoch": 0.4483518297810867,
+ "grad_norm": 22026.19140625,
+ "learning_rate": 7.16195868885904e-05,
+ "loss": 0.4401,
+ "step": 86900
+ },
+ {
+ "epoch": 0.44860979976369947,
+ "grad_norm": 21140.90234375,
+ "learning_rate": 7.158664240442881e-05,
+ "loss": 0.436,
+ "step": 86950
+ },
+ {
+ "epoch": 0.4488677697463123,
+ "grad_norm": 25489.1796875,
+ "learning_rate": 7.155368639773552e-05,
+ "loss": 0.4379,
+ "step": 87000
+ },
+ {
+ "epoch": 0.44912573972892517,
+ "grad_norm": 21035.275390625,
+ "learning_rate": 7.152071888610176e-05,
+ "loss": 0.433,
+ "step": 87050
+ },
+ {
+ "epoch": 0.44938370971153796,
+ "grad_norm": 25905.03515625,
+ "learning_rate": 7.148773988712503e-05,
+ "loss": 0.4423,
+ "step": 87100
+ },
+ {
+ "epoch": 0.4496416796941508,
+ "grad_norm": 21237.857421875,
+ "learning_rate": 7.14547494184088e-05,
+ "loss": 0.4346,
+ "step": 87150
+ },
+ {
+ "epoch": 0.4498996496767636,
+ "grad_norm": 19255.748046875,
+ "learning_rate": 7.14217474975628e-05,
+ "loss": 0.4333,
+ "step": 87200
+ },
+ {
+ "epoch": 0.45015761965937645,
+ "grad_norm": 22115.05078125,
+ "learning_rate": 7.138873414220277e-05,
+ "loss": 0.4371,
+ "step": 87250
+ },
+ {
+ "epoch": 0.45041558964198924,
+ "grad_norm": 23271.462890625,
+ "learning_rate": 7.135570936995064e-05,
+ "loss": 0.4362,
+ "step": 87300
+ },
+ {
+ "epoch": 0.4506735596246021,
+ "grad_norm": 24245.02734375,
+ "learning_rate": 7.132267319843438e-05,
+ "loss": 0.4371,
+ "step": 87350
+ },
+ {
+ "epoch": 0.4509315296072149,
+ "grad_norm": 22234.224609375,
+ "learning_rate": 7.128962564528805e-05,
+ "loss": 0.4306,
+ "step": 87400
+ },
+ {
+ "epoch": 0.45118949958982774,
+ "grad_norm": 22704.115234375,
+ "learning_rate": 7.12565667281518e-05,
+ "loss": 0.4408,
+ "step": 87450
+ },
+ {
+ "epoch": 0.45144746957244053,
+ "grad_norm": 21906.650390625,
+ "learning_rate": 7.122349646467183e-05,
+ "loss": 0.4322,
+ "step": 87500
+ },
+ {
+ "epoch": 0.4517054395550534,
+ "grad_norm": 21960.501953125,
+ "learning_rate": 7.119041487250045e-05,
+ "loss": 0.4322,
+ "step": 87550
+ },
+ {
+ "epoch": 0.45196340953766617,
+ "grad_norm": 20264.14453125,
+ "learning_rate": 7.11573219692959e-05,
+ "loss": 0.4403,
+ "step": 87600
+ },
+ {
+ "epoch": 0.452221379520279,
+ "grad_norm": 20237.078125,
+ "learning_rate": 7.112421777272259e-05,
+ "loss": 0.4421,
+ "step": 87650
+ },
+ {
+ "epoch": 0.45247934950289187,
+ "grad_norm": 22111.3203125,
+ "learning_rate": 7.109110230045087e-05,
+ "loss": 0.4386,
+ "step": 87700
+ },
+ {
+ "epoch": 0.45273731948550466,
+ "grad_norm": 20690.015625,
+ "learning_rate": 7.105797557015715e-05,
+ "loss": 0.4315,
+ "step": 87750
+ },
+ {
+ "epoch": 0.4529952894681175,
+ "grad_norm": 23273.888671875,
+ "learning_rate": 7.102483759952384e-05,
+ "loss": 0.4397,
+ "step": 87800
+ },
+ {
+ "epoch": 0.4532532594507303,
+ "grad_norm": 20268.541015625,
+ "learning_rate": 7.099168840623935e-05,
+ "loss": 0.4381,
+ "step": 87850
+ },
+ {
+ "epoch": 0.45351122943334315,
+ "grad_norm": 21591.724609375,
+ "learning_rate": 7.095852800799806e-05,
+ "loss": 0.4368,
+ "step": 87900
+ },
+ {
+ "epoch": 0.45376919941595595,
+ "grad_norm": 20683.994140625,
+ "learning_rate": 7.092535642250035e-05,
+ "loss": 0.4315,
+ "step": 87950
+ },
+ {
+ "epoch": 0.4540271693985688,
+ "grad_norm": 22910.26953125,
+ "learning_rate": 7.089217366745258e-05,
+ "loss": 0.4415,
+ "step": 88000
+ },
+ {
+ "epoch": 0.4542851393811816,
+ "grad_norm": 22321.40234375,
+ "learning_rate": 7.085897976056706e-05,
+ "loss": 0.4386,
+ "step": 88050
+ },
+ {
+ "epoch": 0.45454310936379444,
+ "grad_norm": 20730.521484375,
+ "learning_rate": 7.082577471956206e-05,
+ "loss": 0.4335,
+ "step": 88100
+ },
+ {
+ "epoch": 0.45480107934640723,
+ "grad_norm": 23302.033203125,
+ "learning_rate": 7.079255856216177e-05,
+ "loss": 0.4366,
+ "step": 88150
+ },
+ {
+ "epoch": 0.4550590493290201,
+ "grad_norm": 21125.5625,
+ "learning_rate": 7.075933130609636e-05,
+ "loss": 0.4388,
+ "step": 88200
+ },
+ {
+ "epoch": 0.4553170193116329,
+ "grad_norm": 24245.548828125,
+ "learning_rate": 7.072609296910187e-05,
+ "loss": 0.4369,
+ "step": 88250
+ },
+ {
+ "epoch": 0.4555749892942457,
+ "grad_norm": 19609.1484375,
+ "learning_rate": 7.06928435689203e-05,
+ "loss": 0.4287,
+ "step": 88300
+ },
+ {
+ "epoch": 0.4558329592768585,
+ "grad_norm": 21653.08984375,
+ "learning_rate": 7.065958312329953e-05,
+ "loss": 0.4357,
+ "step": 88350
+ },
+ {
+ "epoch": 0.45609092925947137,
+ "grad_norm": 23725.236328125,
+ "learning_rate": 7.062631164999331e-05,
+ "loss": 0.4382,
+ "step": 88400
+ },
+ {
+ "epoch": 0.4563488992420842,
+ "grad_norm": 21436.92578125,
+ "learning_rate": 7.059302916676137e-05,
+ "loss": 0.4373,
+ "step": 88450
+ },
+ {
+ "epoch": 0.456606869224697,
+ "grad_norm": 20179.189453125,
+ "learning_rate": 7.05597356913692e-05,
+ "loss": 0.4304,
+ "step": 88500
+ },
+ {
+ "epoch": 0.45686483920730986,
+ "grad_norm": 22804.22265625,
+ "learning_rate": 7.052643124158824e-05,
+ "loss": 0.4343,
+ "step": 88550
+ },
+ {
+ "epoch": 0.45712280918992265,
+ "grad_norm": 21530.931640625,
+ "learning_rate": 7.049311583519574e-05,
+ "loss": 0.4364,
+ "step": 88600
+ },
+ {
+ "epoch": 0.4573807791725355,
+ "grad_norm": 21411.646484375,
+ "learning_rate": 7.045978948997486e-05,
+ "loss": 0.436,
+ "step": 88650
+ },
+ {
+ "epoch": 0.4576387491551483,
+ "grad_norm": 20853.962890625,
+ "learning_rate": 7.042645222371451e-05,
+ "loss": 0.436,
+ "step": 88700
+ },
+ {
+ "epoch": 0.45789671913776114,
+ "grad_norm": 20940.28125,
+ "learning_rate": 7.039310405420952e-05,
+ "loss": 0.4349,
+ "step": 88750
+ },
+ {
+ "epoch": 0.45815468912037394,
+ "grad_norm": 22368.05078125,
+ "learning_rate": 7.035974499926045e-05,
+ "loss": 0.4355,
+ "step": 88800
+ },
+ {
+ "epoch": 0.4584126591029868,
+ "grad_norm": 21155.3984375,
+ "learning_rate": 7.032637507667377e-05,
+ "loss": 0.4292,
+ "step": 88850
+ },
+ {
+ "epoch": 0.4586706290855996,
+ "grad_norm": 21627.353515625,
+ "learning_rate": 7.029299430426164e-05,
+ "loss": 0.4404,
+ "step": 88900
+ },
+ {
+ "epoch": 0.45892859906821243,
+ "grad_norm": 22008.23046875,
+ "learning_rate": 7.025960269984212e-05,
+ "loss": 0.431,
+ "step": 88950
+ },
+ {
+ "epoch": 0.4591865690508252,
+ "grad_norm": 21588.109375,
+ "learning_rate": 7.022620028123898e-05,
+ "loss": 0.4319,
+ "step": 89000
+ },
+ {
+ "epoch": 0.45944453903343807,
+ "grad_norm": 21680.646484375,
+ "learning_rate": 7.019278706628179e-05,
+ "loss": 0.4403,
+ "step": 89050
+ },
+ {
+ "epoch": 0.4597025090160509,
+ "grad_norm": 25427.423828125,
+ "learning_rate": 7.015936307280587e-05,
+ "loss": 0.435,
+ "step": 89100
+ },
+ {
+ "epoch": 0.4599604789986637,
+ "grad_norm": 22674.693359375,
+ "learning_rate": 7.01259283186523e-05,
+ "loss": 0.4377,
+ "step": 89150
+ },
+ {
+ "epoch": 0.46021844898127656,
+ "grad_norm": 24841.029296875,
+ "learning_rate": 7.009248282166793e-05,
+ "loss": 0.4387,
+ "step": 89200
+ },
+ {
+ "epoch": 0.46047641896388936,
+ "grad_norm": 21259.369140625,
+ "learning_rate": 7.005902659970528e-05,
+ "loss": 0.4355,
+ "step": 89250
+ },
+ {
+ "epoch": 0.4607343889465022,
+ "grad_norm": 19364.466796875,
+ "learning_rate": 7.002555967062265e-05,
+ "loss": 0.4353,
+ "step": 89300
+ },
+ {
+ "epoch": 0.460992358929115,
+ "grad_norm": 25116.47265625,
+ "learning_rate": 6.999208205228405e-05,
+ "loss": 0.4328,
+ "step": 89350
+ },
+ {
+ "epoch": 0.46125032891172785,
+ "grad_norm": 24426.4296875,
+ "learning_rate": 6.995859376255918e-05,
+ "loss": 0.4331,
+ "step": 89400
+ },
+ {
+ "epoch": 0.46150829889434064,
+ "grad_norm": 20802.759765625,
+ "learning_rate": 6.99250948193234e-05,
+ "loss": 0.4294,
+ "step": 89450
+ },
+ {
+ "epoch": 0.4617662688769535,
+ "grad_norm": 23164.2109375,
+ "learning_rate": 6.989158524045787e-05,
+ "loss": 0.4338,
+ "step": 89500
+ },
+ {
+ "epoch": 0.4620242388595663,
+ "grad_norm": 20543.28515625,
+ "learning_rate": 6.98580650438493e-05,
+ "loss": 0.4243,
+ "step": 89550
+ },
+ {
+ "epoch": 0.46228220884217913,
+ "grad_norm": 22468.732421875,
+ "learning_rate": 6.982453424739016e-05,
+ "loss": 0.4306,
+ "step": 89600
+ },
+ {
+ "epoch": 0.4625401788247919,
+ "grad_norm": 22903.12890625,
+ "learning_rate": 6.979099286897849e-05,
+ "loss": 0.4316,
+ "step": 89650
+ },
+ {
+ "epoch": 0.4627981488074048,
+ "grad_norm": 23074.068359375,
+ "learning_rate": 6.975744092651808e-05,
+ "loss": 0.4371,
+ "step": 89700
+ },
+ {
+ "epoch": 0.46305611879001757,
+ "grad_norm": 22003.00390625,
+ "learning_rate": 6.972387843791827e-05,
+ "loss": 0.4329,
+ "step": 89750
+ },
+ {
+ "epoch": 0.4633140887726304,
+ "grad_norm": 21524.93359375,
+ "learning_rate": 6.969030542109407e-05,
+ "loss": 0.4348,
+ "step": 89800
+ },
+ {
+ "epoch": 0.46357205875524327,
+ "grad_norm": 20501.130859375,
+ "learning_rate": 6.965672189396614e-05,
+ "loss": 0.4286,
+ "step": 89850
+ },
+ {
+ "epoch": 0.46383002873785606,
+ "grad_norm": 21559.396484375,
+ "learning_rate": 6.962312787446068e-05,
+ "loss": 0.434,
+ "step": 89900
+ },
+ {
+ "epoch": 0.4640879987204689,
+ "grad_norm": 21185.537109375,
+ "learning_rate": 6.958952338050955e-05,
+ "loss": 0.4326,
+ "step": 89950
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "grad_norm": 23004.626953125,
+ "learning_rate": 6.955590843005016e-05,
+ "loss": 0.4272,
+ "step": 90000
+ },
+ {
+ "epoch": 0.4643459687030817,
+ "eval_loss": 0.4223860800266266,
+ "eval_runtime": 3251.8949,
+ "eval_samples_per_second": 953.635,
+ "eval_steps_per_second": 1.863,
+ "step": 90000
+ },
+ {
+ "epoch": 0.46460393868569455,
+ "grad_norm": 20333.259765625,
+ "learning_rate": 6.952228304102553e-05,
+ "loss": 0.4338,
+ "step": 90050
+ },
+ {
+ "epoch": 0.46486190866830734,
+ "grad_norm": 25967.029296875,
+ "learning_rate": 6.948864723138423e-05,
+ "loss": 0.4352,
+ "step": 90100
+ },
+ {
+ "epoch": 0.4651198786509202,
+ "grad_norm": 22849.9375,
+ "learning_rate": 6.945500101908043e-05,
+ "loss": 0.4358,
+ "step": 90150
+ },
+ {
+ "epoch": 0.465377848633533,
+ "grad_norm": 20628.9453125,
+ "learning_rate": 6.94213444220738e-05,
+ "loss": 0.4343,
+ "step": 90200
+ },
+ {
+ "epoch": 0.46563581861614584,
+ "grad_norm": 22179.84375,
+ "learning_rate": 6.938767745832959e-05,
+ "loss": 0.4314,
+ "step": 90250
+ },
+ {
+ "epoch": 0.46589378859875863,
+ "grad_norm": 24433.46484375,
+ "learning_rate": 6.935400014581858e-05,
+ "loss": 0.436,
+ "step": 90300
+ },
+ {
+ "epoch": 0.4661517585813715,
+ "grad_norm": 21914.666015625,
+ "learning_rate": 6.932031250251705e-05,
+ "loss": 0.431,
+ "step": 90350
+ },
+ {
+ "epoch": 0.46640972856398427,
+ "grad_norm": 19517.78125,
+ "learning_rate": 6.928661454640683e-05,
+ "loss": 0.4282,
+ "step": 90400
+ },
+ {
+ "epoch": 0.4666676985465971,
+ "grad_norm": 25924.5234375,
+ "learning_rate": 6.925290629547522e-05,
+ "loss": 0.4344,
+ "step": 90450
+ },
+ {
+ "epoch": 0.4669256685292099,
+ "grad_norm": 20866.927734375,
+ "learning_rate": 6.921918776771505e-05,
+ "loss": 0.4336,
+ "step": 90500
+ },
+ {
+ "epoch": 0.46718363851182276,
+ "grad_norm": 22734.5625,
+ "learning_rate": 6.91854589811246e-05,
+ "loss": 0.4375,
+ "step": 90550
+ },
+ {
+ "epoch": 0.4674416084944356,
+ "grad_norm": 21173.5703125,
+ "learning_rate": 6.915171995370766e-05,
+ "loss": 0.428,
+ "step": 90600
+ },
+ {
+ "epoch": 0.4676995784770484,
+ "grad_norm": 23864.681640625,
+ "learning_rate": 6.911797070347346e-05,
+ "loss": 0.4344,
+ "step": 90650
+ },
+ {
+ "epoch": 0.46795754845966125,
+ "grad_norm": 26236.091796875,
+ "learning_rate": 6.908421124843669e-05,
+ "loss": 0.4345,
+ "step": 90700
+ },
+ {
+ "epoch": 0.46821551844227405,
+ "grad_norm": 20788.6015625,
+ "learning_rate": 6.905044160661748e-05,
+ "loss": 0.4332,
+ "step": 90750
+ },
+ {
+ "epoch": 0.4684734884248869,
+ "grad_norm": 21382.2578125,
+ "learning_rate": 6.901666179604148e-05,
+ "loss": 0.4356,
+ "step": 90800
+ },
+ {
+ "epoch": 0.4687314584074997,
+ "grad_norm": 20230.220703125,
+ "learning_rate": 6.898287183473961e-05,
+ "loss": 0.4262,
+ "step": 90850
+ },
+ {
+ "epoch": 0.46898942839011254,
+ "grad_norm": 31838.697265625,
+ "learning_rate": 6.894907174074836e-05,
+ "loss": 0.4316,
+ "step": 90900
+ },
+ {
+ "epoch": 0.46924739837272533,
+ "grad_norm": 21029.5234375,
+ "learning_rate": 6.891526153210953e-05,
+ "loss": 0.4346,
+ "step": 90950
+ },
+ {
+ "epoch": 0.4695053683553382,
+ "grad_norm": 23617.826171875,
+ "learning_rate": 6.888144122687035e-05,
+ "loss": 0.4262,
+ "step": 91000
+ },
+ {
+ "epoch": 0.469763338337951,
+ "grad_norm": 23151.751953125,
+ "learning_rate": 6.884761084308349e-05,
+ "loss": 0.4296,
+ "step": 91050
+ },
+ {
+ "epoch": 0.4700213083205638,
+ "grad_norm": 19649.466796875,
+ "learning_rate": 6.881377039880692e-05,
+ "loss": 0.4325,
+ "step": 91100
+ },
+ {
+ "epoch": 0.4702792783031766,
+ "grad_norm": 20488.10546875,
+ "learning_rate": 6.8779919912104e-05,
+ "loss": 0.4352,
+ "step": 91150
+ },
+ {
+ "epoch": 0.47053724828578947,
+ "grad_norm": 21639.306640625,
+ "learning_rate": 6.874605940104349e-05,
+ "loss": 0.4319,
+ "step": 91200
+ },
+ {
+ "epoch": 0.4707952182684023,
+ "grad_norm": 21799.994140625,
+ "learning_rate": 6.871218888369947e-05,
+ "loss": 0.4315,
+ "step": 91250
+ },
+ {
+ "epoch": 0.4710531882510151,
+ "grad_norm": 22425.94140625,
+ "learning_rate": 6.867830837815137e-05,
+ "loss": 0.4381,
+ "step": 91300
+ },
+ {
+ "epoch": 0.47131115823362796,
+ "grad_norm": 22582.57421875,
+ "learning_rate": 6.864441790248396e-05,
+ "loss": 0.4297,
+ "step": 91350
+ },
+ {
+ "epoch": 0.47156912821624075,
+ "grad_norm": 21082.38671875,
+ "learning_rate": 6.861051747478726e-05,
+ "loss": 0.4292,
+ "step": 91400
+ },
+ {
+ "epoch": 0.4718270981988536,
+ "grad_norm": 23156.5546875,
+ "learning_rate": 6.857660711315672e-05,
+ "loss": 0.4276,
+ "step": 91450
+ },
+ {
+ "epoch": 0.4720850681814664,
+ "grad_norm": 21754.6796875,
+ "learning_rate": 6.854268683569302e-05,
+ "loss": 0.4369,
+ "step": 91500
+ },
+ {
+ "epoch": 0.47234303816407924,
+ "grad_norm": 22397.896484375,
+ "learning_rate": 6.850875666050216e-05,
+ "loss": 0.4312,
+ "step": 91550
+ },
+ {
+ "epoch": 0.47260100814669204,
+ "grad_norm": 21344.166015625,
+ "learning_rate": 6.847481660569537e-05,
+ "loss": 0.4291,
+ "step": 91600
+ },
+ {
+ "epoch": 0.4728589781293049,
+ "grad_norm": 23818.71484375,
+ "learning_rate": 6.844086668938923e-05,
+ "loss": 0.4352,
+ "step": 91650
+ },
+ {
+ "epoch": 0.4731169481119177,
+ "grad_norm": 21734.537109375,
+ "learning_rate": 6.840690692970554e-05,
+ "loss": 0.4326,
+ "step": 91700
+ },
+ {
+ "epoch": 0.47337491809453053,
+ "grad_norm": 22027.734375,
+ "learning_rate": 6.837293734477136e-05,
+ "loss": 0.4369,
+ "step": 91750
+ },
+ {
+ "epoch": 0.4736328880771433,
+ "grad_norm": 23111.103515625,
+ "learning_rate": 6.8338957952719e-05,
+ "loss": 0.4396,
+ "step": 91800
+ },
+ {
+ "epoch": 0.47389085805975617,
+ "grad_norm": 22521.767578125,
+ "learning_rate": 6.830496877168599e-05,
+ "loss": 0.4376,
+ "step": 91850
+ },
+ {
+ "epoch": 0.47414882804236896,
+ "grad_norm": 19730.158203125,
+ "learning_rate": 6.827096981981511e-05,
+ "loss": 0.4321,
+ "step": 91900
+ },
+ {
+ "epoch": 0.4744067980249818,
+ "grad_norm": 21871.134765625,
+ "learning_rate": 6.823696111525433e-05,
+ "loss": 0.4373,
+ "step": 91950
+ },
+ {
+ "epoch": 0.47466476800759466,
+ "grad_norm": 22332.384765625,
+ "learning_rate": 6.820294267615686e-05,
+ "loss": 0.4323,
+ "step": 92000
+ },
+ {
+ "epoch": 0.47492273799020746,
+ "grad_norm": 22426.59765625,
+ "learning_rate": 6.816891452068104e-05,
+ "loss": 0.4272,
+ "step": 92050
+ },
+ {
+ "epoch": 0.4751807079728203,
+ "grad_norm": 23286.05859375,
+ "learning_rate": 6.81348766669905e-05,
+ "loss": 0.4442,
+ "step": 92100
+ },
+ {
+ "epoch": 0.4754386779554331,
+ "grad_norm": 21696.1171875,
+ "learning_rate": 6.810082913325395e-05,
+ "loss": 0.4288,
+ "step": 92150
+ },
+ {
+ "epoch": 0.47569664793804595,
+ "grad_norm": 20548.908203125,
+ "learning_rate": 6.80667719376453e-05,
+ "loss": 0.4358,
+ "step": 92200
+ },
+ {
+ "epoch": 0.47595461792065874,
+ "grad_norm": 22605.1640625,
+ "learning_rate": 6.803270509834363e-05,
+ "loss": 0.4327,
+ "step": 92250
+ },
+ {
+ "epoch": 0.4762125879032716,
+ "grad_norm": 23604.30078125,
+ "learning_rate": 6.799862863353318e-05,
+ "loss": 0.441,
+ "step": 92300
+ },
+ {
+ "epoch": 0.4764705578858844,
+ "grad_norm": 22117.1796875,
+ "learning_rate": 6.796454256140328e-05,
+ "loss": 0.4289,
+ "step": 92350
+ },
+ {
+ "epoch": 0.47672852786849723,
+ "grad_norm": 22476.54296875,
+ "learning_rate": 6.793044690014842e-05,
+ "loss": 0.4319,
+ "step": 92400
+ },
+ {
+ "epoch": 0.47698649785111,
+ "grad_norm": 20855.140625,
+ "learning_rate": 6.789634166796821e-05,
+ "loss": 0.4326,
+ "step": 92450
+ },
+ {
+ "epoch": 0.4772444678337229,
+ "grad_norm": 23704.125,
+ "learning_rate": 6.786222688306734e-05,
+ "loss": 0.4374,
+ "step": 92500
+ },
+ {
+ "epoch": 0.47750243781633567,
+ "grad_norm": 20677.91015625,
+ "learning_rate": 6.782810256365568e-05,
+ "loss": 0.4261,
+ "step": 92550
+ },
+ {
+ "epoch": 0.4777604077989485,
+ "grad_norm": 21245.837890625,
+ "learning_rate": 6.779396872794807e-05,
+ "loss": 0.4309,
+ "step": 92600
+ },
+ {
+ "epoch": 0.4780183777815613,
+ "grad_norm": 25415.859375,
+ "learning_rate": 6.775982539416453e-05,
+ "loss": 0.437,
+ "step": 92650
+ },
+ {
+ "epoch": 0.47827634776417416,
+ "grad_norm": 20582.556640625,
+ "learning_rate": 6.772567258053007e-05,
+ "loss": 0.4349,
+ "step": 92700
+ },
+ {
+ "epoch": 0.478534317746787,
+ "grad_norm": 20002.013671875,
+ "learning_rate": 6.769151030527483e-05,
+ "loss": 0.4263,
+ "step": 92750
+ },
+ {
+ "epoch": 0.4787922877293998,
+ "grad_norm": 23287.6875,
+ "learning_rate": 6.765733858663397e-05,
+ "loss": 0.4332,
+ "step": 92800
+ },
+ {
+ "epoch": 0.47905025771201265,
+ "grad_norm": 22023.66796875,
+ "learning_rate": 6.76231574428477e-05,
+ "loss": 0.4339,
+ "step": 92850
+ },
+ {
+ "epoch": 0.47930822769462544,
+ "grad_norm": 21299.185546875,
+ "learning_rate": 6.758896689216122e-05,
+ "loss": 0.4293,
+ "step": 92900
+ },
+ {
+ "epoch": 0.4795661976772383,
+ "grad_norm": 21979.560546875,
+ "learning_rate": 6.755476695282479e-05,
+ "loss": 0.4314,
+ "step": 92950
+ },
+ {
+ "epoch": 0.4798241676598511,
+ "grad_norm": 21399.029296875,
+ "learning_rate": 6.752055764309372e-05,
+ "loss": 0.4374,
+ "step": 93000
+ },
+ {
+ "epoch": 0.48008213764246394,
+ "grad_norm": 23827.685546875,
+ "learning_rate": 6.748633898122823e-05,
+ "loss": 0.4348,
+ "step": 93050
+ },
+ {
+ "epoch": 0.48034010762507673,
+ "grad_norm": 21079.61328125,
+ "learning_rate": 6.74521109854936e-05,
+ "loss": 0.4312,
+ "step": 93100
+ },
+ {
+ "epoch": 0.4805980776076896,
+ "grad_norm": 20395.04296875,
+ "learning_rate": 6.741787367416006e-05,
+ "loss": 0.4246,
+ "step": 93150
+ },
+ {
+ "epoch": 0.48085604759030237,
+ "grad_norm": 21922.576171875,
+ "learning_rate": 6.738362706550284e-05,
+ "loss": 0.4355,
+ "step": 93200
+ },
+ {
+ "epoch": 0.4811140175729152,
+ "grad_norm": 21317.001953125,
+ "learning_rate": 6.734937117780211e-05,
+ "loss": 0.4302,
+ "step": 93250
+ },
+ {
+ "epoch": 0.481371987555528,
+ "grad_norm": 21387.46484375,
+ "learning_rate": 6.731510602934298e-05,
+ "loss": 0.434,
+ "step": 93300
+ },
+ {
+ "epoch": 0.48162995753814086,
+ "grad_norm": 24289.28515625,
+ "learning_rate": 6.728083163841554e-05,
+ "loss": 0.4338,
+ "step": 93350
+ },
+ {
+ "epoch": 0.4818879275207537,
+ "grad_norm": 23514.162109375,
+ "learning_rate": 6.72465480233148e-05,
+ "loss": 0.4357,
+ "step": 93400
+ },
+ {
+ "epoch": 0.4821458975033665,
+ "grad_norm": 21481.0859375,
+ "learning_rate": 6.721225520234068e-05,
+ "loss": 0.4307,
+ "step": 93450
+ },
+ {
+ "epoch": 0.48240386748597935,
+ "grad_norm": 25044.396484375,
+ "learning_rate": 6.717795319379805e-05,
+ "loss": 0.4335,
+ "step": 93500
+ },
+ {
+ "epoch": 0.48266183746859215,
+ "grad_norm": 21193.333984375,
+ "learning_rate": 6.714364201599662e-05,
+ "loss": 0.4243,
+ "step": 93550
+ },
+ {
+ "epoch": 0.482919807451205,
+ "grad_norm": 19113.275390625,
+ "learning_rate": 6.710932168725105e-05,
+ "loss": 0.4331,
+ "step": 93600
+ },
+ {
+ "epoch": 0.4831777774338178,
+ "grad_norm": 21924.162109375,
+ "learning_rate": 6.707499222588087e-05,
+ "loss": 0.4309,
+ "step": 93650
+ },
+ {
+ "epoch": 0.48343574741643064,
+ "grad_norm": 21123.498046875,
+ "learning_rate": 6.704065365021048e-05,
+ "loss": 0.4392,
+ "step": 93700
+ },
+ {
+ "epoch": 0.48369371739904343,
+ "grad_norm": 22201.29296875,
+ "learning_rate": 6.700630597856914e-05,
+ "loss": 0.4281,
+ "step": 93750
+ },
+ {
+ "epoch": 0.4839516873816563,
+ "grad_norm": 24237.494140625,
+ "learning_rate": 6.697194922929096e-05,
+ "loss": 0.4367,
+ "step": 93800
+ },
+ {
+ "epoch": 0.4842096573642691,
+ "grad_norm": 21306.8125,
+ "learning_rate": 6.693758342071495e-05,
+ "loss": 0.4374,
+ "step": 93850
+ },
+ {
+ "epoch": 0.4844676273468819,
+ "grad_norm": 22120.75,
+ "learning_rate": 6.690320857118488e-05,
+ "loss": 0.4309,
+ "step": 93900
+ },
+ {
+ "epoch": 0.4847255973294947,
+ "grad_norm": 20799.59765625,
+ "learning_rate": 6.686882469904939e-05,
+ "loss": 0.4262,
+ "step": 93950
+ },
+ {
+ "epoch": 0.48498356731210757,
+ "grad_norm": 22964.642578125,
+ "learning_rate": 6.683443182266192e-05,
+ "loss": 0.4338,
+ "step": 94000
+ },
+ {
+ "epoch": 0.48524153729472036,
+ "grad_norm": 22017.076171875,
+ "learning_rate": 6.68000299603807e-05,
+ "loss": 0.4317,
+ "step": 94050
+ },
+ {
+ "epoch": 0.4854995072773332,
+ "grad_norm": 21423.890625,
+ "learning_rate": 6.676561913056884e-05,
+ "loss": 0.4329,
+ "step": 94100
+ },
+ {
+ "epoch": 0.48575747725994606,
+ "grad_norm": 22123.390625,
+ "learning_rate": 6.67311993515941e-05,
+ "loss": 0.4309,
+ "step": 94150
+ },
+ {
+ "epoch": 0.48601544724255885,
+ "grad_norm": 23107.208984375,
+ "learning_rate": 6.669677064182915e-05,
+ "loss": 0.4316,
+ "step": 94200
+ },
+ {
+ "epoch": 0.4862734172251717,
+ "grad_norm": 21250.33203125,
+ "learning_rate": 6.666233301965132e-05,
+ "loss": 0.4289,
+ "step": 94250
+ },
+ {
+ "epoch": 0.4865313872077845,
+ "grad_norm": 21629.720703125,
+ "learning_rate": 6.66278865034428e-05,
+ "loss": 0.4301,
+ "step": 94300
+ },
+ {
+ "epoch": 0.48678935719039734,
+ "grad_norm": 23665.4609375,
+ "learning_rate": 6.659343111159043e-05,
+ "loss": 0.4267,
+ "step": 94350
+ },
+ {
+ "epoch": 0.48704732717301014,
+ "grad_norm": 23254.232421875,
+ "learning_rate": 6.655896686248583e-05,
+ "loss": 0.4266,
+ "step": 94400
+ },
+ {
+ "epoch": 0.487305297155623,
+ "grad_norm": 22491.404296875,
+ "learning_rate": 6.652449377452539e-05,
+ "loss": 0.4278,
+ "step": 94450
+ },
+ {
+ "epoch": 0.4875632671382358,
+ "grad_norm": 21071.74609375,
+ "learning_rate": 6.649001186611015e-05,
+ "loss": 0.4308,
+ "step": 94500
+ },
+ {
+ "epoch": 0.4878212371208486,
+ "grad_norm": 20860.861328125,
+ "learning_rate": 6.64555211556459e-05,
+ "loss": 0.4308,
+ "step": 94550
+ },
+ {
+ "epoch": 0.4880792071034614,
+ "grad_norm": 21733.033203125,
+ "learning_rate": 6.642102166154308e-05,
+ "loss": 0.4376,
+ "step": 94600
+ },
+ {
+ "epoch": 0.48833717708607427,
+ "grad_norm": 22799.3984375,
+ "learning_rate": 6.638651340221687e-05,
+ "loss": 0.4289,
+ "step": 94650
+ },
+ {
+ "epoch": 0.48859514706868706,
+ "grad_norm": 21678.296875,
+ "learning_rate": 6.635199639608709e-05,
+ "loss": 0.4301,
+ "step": 94700
+ },
+ {
+ "epoch": 0.4888531170512999,
+ "grad_norm": 20510.052734375,
+ "learning_rate": 6.631747066157831e-05,
+ "loss": 0.4276,
+ "step": 94750
+ },
+ {
+ "epoch": 0.48911108703391276,
+ "grad_norm": 21075.474609375,
+ "learning_rate": 6.628293621711964e-05,
+ "loss": 0.435,
+ "step": 94800
+ },
+ {
+ "epoch": 0.48936905701652555,
+ "grad_norm": 22063.083984375,
+ "learning_rate": 6.624839308114492e-05,
+ "loss": 0.434,
+ "step": 94850
+ },
+ {
+ "epoch": 0.4896270269991384,
+ "grad_norm": 20185.99609375,
+ "learning_rate": 6.621384127209261e-05,
+ "loss": 0.4246,
+ "step": 94900
+ },
+ {
+ "epoch": 0.4898849969817512,
+ "grad_norm": 22002.326171875,
+ "learning_rate": 6.61792808084058e-05,
+ "loss": 0.4272,
+ "step": 94950
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "grad_norm": 22271.25,
+ "learning_rate": 6.614471170853218e-05,
+ "loss": 0.4323,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49014296696436405,
+ "eval_loss": 0.4187907576560974,
+ "eval_runtime": 3274.3922,
+ "eval_samples_per_second": 947.083,
+ "eval_steps_per_second": 1.85,
+ "step": 95000
+ },
+ {
+ "epoch": 0.49040093694697684,
+ "grad_norm": 20668.224609375,
+ "learning_rate": 6.611013399092406e-05,
+ "loss": 0.4285,
+ "step": 95050
+ },
+ {
+ "epoch": 0.4906589069295897,
+ "grad_norm": 20890.05078125,
+ "learning_rate": 6.607554767403838e-05,
+ "loss": 0.4333,
+ "step": 95100
+ },
+ {
+ "epoch": 0.4909168769122025,
+ "grad_norm": 22767.6875,
+ "learning_rate": 6.604095277633664e-05,
+ "loss": 0.4284,
+ "step": 95150
+ },
+ {
+ "epoch": 0.49117484689481533,
+ "grad_norm": 22603.083984375,
+ "learning_rate": 6.600634931628493e-05,
+ "loss": 0.4332,
+ "step": 95200
+ },
+ {
+ "epoch": 0.4914328168774281,
+ "grad_norm": 25005.8984375,
+ "learning_rate": 6.597173731235388e-05,
+ "loss": 0.4284,
+ "step": 95250
+ },
+ {
+ "epoch": 0.491690786860041,
+ "grad_norm": 23687.4765625,
+ "learning_rate": 6.593711678301874e-05,
+ "loss": 0.4316,
+ "step": 95300
+ },
+ {
+ "epoch": 0.49194875684265377,
+ "grad_norm": 19670.087890625,
+ "learning_rate": 6.590248774675926e-05,
+ "loss": 0.4326,
+ "step": 95350
+ },
+ {
+ "epoch": 0.4922067268252666,
+ "grad_norm": 23065.818359375,
+ "learning_rate": 6.586785022205977e-05,
+ "loss": 0.4316,
+ "step": 95400
+ },
+ {
+ "epoch": 0.4924646968078794,
+ "grad_norm": 21279.01953125,
+ "learning_rate": 6.583320422740909e-05,
+ "loss": 0.4278,
+ "step": 95450
+ },
+ {
+ "epoch": 0.49272266679049226,
+ "grad_norm": 19707.6328125,
+ "learning_rate": 6.579854978130057e-05,
+ "loss": 0.4272,
+ "step": 95500
+ },
+ {
+ "epoch": 0.4929806367731051,
+ "grad_norm": 22938.3515625,
+ "learning_rate": 6.57638869022321e-05,
+ "loss": 0.4316,
+ "step": 95550
+ },
+ {
+ "epoch": 0.4932386067557179,
+ "grad_norm": 24812.65625,
+ "learning_rate": 6.572921560870607e-05,
+ "loss": 0.4315,
+ "step": 95600
+ },
+ {
+ "epoch": 0.49349657673833075,
+ "grad_norm": 21462.873046875,
+ "learning_rate": 6.569453591922931e-05,
+ "loss": 0.4299,
+ "step": 95650
+ },
+ {
+ "epoch": 0.49375454672094354,
+ "grad_norm": 22590.384765625,
+ "learning_rate": 6.565984785231318e-05,
+ "loss": 0.4294,
+ "step": 95700
+ },
+ {
+ "epoch": 0.4940125167035564,
+ "grad_norm": 23677.619140625,
+ "learning_rate": 6.56251514264735e-05,
+ "loss": 0.4379,
+ "step": 95750
+ },
+ {
+ "epoch": 0.4942704866861692,
+ "grad_norm": 22078.87109375,
+ "learning_rate": 6.559044666023057e-05,
+ "loss": 0.4276,
+ "step": 95800
+ },
+ {
+ "epoch": 0.49452845666878203,
+ "grad_norm": 22440.369140625,
+ "learning_rate": 6.55557335721091e-05,
+ "loss": 0.4279,
+ "step": 95850
+ },
+ {
+ "epoch": 0.49478642665139483,
+ "grad_norm": 24544.12109375,
+ "learning_rate": 6.552101218063826e-05,
+ "loss": 0.4305,
+ "step": 95900
+ },
+ {
+ "epoch": 0.4950443966340077,
+ "grad_norm": 21647.107421875,
+ "learning_rate": 6.548628250435167e-05,
+ "loss": 0.4328,
+ "step": 95950
+ },
+ {
+ "epoch": 0.49530236661662047,
+ "grad_norm": 21392.28125,
+ "learning_rate": 6.545154456178735e-05,
+ "loss": 0.4299,
+ "step": 96000
+ },
+ {
+ "epoch": 0.4955603365992333,
+ "grad_norm": 19458.55078125,
+ "learning_rate": 6.541679837148775e-05,
+ "loss": 0.4375,
+ "step": 96050
+ },
+ {
+ "epoch": 0.4958183065818461,
+ "grad_norm": 21774.14453125,
+ "learning_rate": 6.53820439519997e-05,
+ "loss": 0.4348,
+ "step": 96100
+ },
+ {
+ "epoch": 0.49607627656445896,
+ "grad_norm": 22902.63671875,
+ "learning_rate": 6.534728132187444e-05,
+ "loss": 0.4297,
+ "step": 96150
+ },
+ {
+ "epoch": 0.49633424654707176,
+ "grad_norm": 20869.306640625,
+ "learning_rate": 6.531251049966762e-05,
+ "loss": 0.4313,
+ "step": 96200
+ },
+ {
+ "epoch": 0.4965922165296846,
+ "grad_norm": 23554.537109375,
+ "learning_rate": 6.527773150393919e-05,
+ "loss": 0.4313,
+ "step": 96250
+ },
+ {
+ "epoch": 0.49685018651229745,
+ "grad_norm": 23000.92578125,
+ "learning_rate": 6.524294435325351e-05,
+ "loss": 0.4266,
+ "step": 96300
+ },
+ {
+ "epoch": 0.49710815649491025,
+ "grad_norm": 21331.72265625,
+ "learning_rate": 6.52081490661793e-05,
+ "loss": 0.4261,
+ "step": 96350
+ },
+ {
+ "epoch": 0.4973661264775231,
+ "grad_norm": 22540.75,
+ "learning_rate": 6.517334566128961e-05,
+ "loss": 0.4282,
+ "step": 96400
+ },
+ {
+ "epoch": 0.4976240964601359,
+ "grad_norm": 21733.560546875,
+ "learning_rate": 6.51385341571618e-05,
+ "loss": 0.43,
+ "step": 96450
+ },
+ {
+ "epoch": 0.49788206644274874,
+ "grad_norm": 23288.21875,
+ "learning_rate": 6.510371457237765e-05,
+ "loss": 0.4306,
+ "step": 96500
+ },
+ {
+ "epoch": 0.49814003642536153,
+ "grad_norm": 24475.9453125,
+ "learning_rate": 6.506888692552309e-05,
+ "loss": 0.4299,
+ "step": 96550
+ },
+ {
+ "epoch": 0.4983980064079744,
+ "grad_norm": 20756.5078125,
+ "learning_rate": 6.503405123518847e-05,
+ "loss": 0.4292,
+ "step": 96600
+ },
+ {
+ "epoch": 0.4986559763905872,
+ "grad_norm": 21059.365234375,
+ "learning_rate": 6.499920751996845e-05,
+ "loss": 0.4261,
+ "step": 96650
+ },
+ {
+ "epoch": 0.4989139463732,
+ "grad_norm": 22173.65625,
+ "learning_rate": 6.496435579846188e-05,
+ "loss": 0.4309,
+ "step": 96700
+ },
+ {
+ "epoch": 0.4991719163558128,
+ "grad_norm": 23941.49609375,
+ "learning_rate": 6.492949608927196e-05,
+ "loss": 0.4355,
+ "step": 96750
+ },
+ {
+ "epoch": 0.49942988633842567,
+ "grad_norm": 22027.400390625,
+ "learning_rate": 6.489462841100611e-05,
+ "loss": 0.433,
+ "step": 96800
+ },
+ {
+ "epoch": 0.49968785632103846,
+ "grad_norm": 21414.77734375,
+ "learning_rate": 6.485975278227605e-05,
+ "loss": 0.4291,
+ "step": 96850
+ },
+ {
+ "epoch": 0.4999458263036513,
+ "grad_norm": 23023.60546875,
+ "learning_rate": 6.482486922169767e-05,
+ "loss": 0.4309,
+ "step": 96900
+ },
+ {
+ "epoch": 0.5002037962862641,
+ "grad_norm": 23856.318359375,
+ "learning_rate": 6.478997774789119e-05,
+ "loss": 0.4314,
+ "step": 96950
+ },
+ {
+ "epoch": 0.500461766268877,
+ "grad_norm": 21834.822265625,
+ "learning_rate": 6.475507837948096e-05,
+ "loss": 0.4319,
+ "step": 97000
+ },
+ {
+ "epoch": 0.5007197362514898,
+ "grad_norm": 22487.779296875,
+ "learning_rate": 6.472017113509561e-05,
+ "loss": 0.4281,
+ "step": 97050
+ },
+ {
+ "epoch": 0.5009777062341026,
+ "grad_norm": 23955.73046875,
+ "learning_rate": 6.468525603336796e-05,
+ "loss": 0.4324,
+ "step": 97100
+ },
+ {
+ "epoch": 0.5012356762167154,
+ "grad_norm": 23631.203125,
+ "learning_rate": 6.4650333092935e-05,
+ "loss": 0.4333,
+ "step": 97150
+ },
+ {
+ "epoch": 0.5014936461993282,
+ "grad_norm": 21347.26953125,
+ "learning_rate": 6.461540233243792e-05,
+ "loss": 0.421,
+ "step": 97200
+ },
+ {
+ "epoch": 0.5017516161819411,
+ "grad_norm": 23590.9140625,
+ "learning_rate": 6.458046377052209e-05,
+ "loss": 0.4347,
+ "step": 97250
+ },
+ {
+ "epoch": 0.5020095861645539,
+ "grad_norm": 23192.708984375,
+ "learning_rate": 6.454551742583703e-05,
+ "loss": 0.4363,
+ "step": 97300
+ },
+ {
+ "epoch": 0.5022675561471667,
+ "grad_norm": 23588.974609375,
+ "learning_rate": 6.451056331703643e-05,
+ "loss": 0.4268,
+ "step": 97350
+ },
+ {
+ "epoch": 0.5025255261297795,
+ "grad_norm": 19536.3046875,
+ "learning_rate": 6.44756014627781e-05,
+ "loss": 0.4268,
+ "step": 97400
+ },
+ {
+ "epoch": 0.5027834961123924,
+ "grad_norm": 20248.345703125,
+ "learning_rate": 6.444063188172401e-05,
+ "loss": 0.4286,
+ "step": 97450
+ },
+ {
+ "epoch": 0.5030414660950052,
+ "grad_norm": 21598.1171875,
+ "learning_rate": 6.440565459254027e-05,
+ "loss": 0.4302,
+ "step": 97500
+ },
+ {
+ "epoch": 0.503299436077618,
+ "grad_norm": 25492.541015625,
+ "learning_rate": 6.437066961389704e-05,
+ "loss": 0.4223,
+ "step": 97550
+ },
+ {
+ "epoch": 0.5035574060602308,
+ "grad_norm": 22227.8125,
+ "learning_rate": 6.433567696446865e-05,
+ "loss": 0.4194,
+ "step": 97600
+ },
+ {
+ "epoch": 0.5038153760428437,
+ "grad_norm": 23799.134765625,
+ "learning_rate": 6.430067666293348e-05,
+ "loss": 0.4239,
+ "step": 97650
+ },
+ {
+ "epoch": 0.5040733460254565,
+ "grad_norm": 25147.080078125,
+ "learning_rate": 6.426566872797403e-05,
+ "loss": 0.4369,
+ "step": 97700
+ },
+ {
+ "epoch": 0.5043313160080694,
+ "grad_norm": 22497.68359375,
+ "learning_rate": 6.423065317827686e-05,
+ "loss": 0.4332,
+ "step": 97750
+ },
+ {
+ "epoch": 0.5045892859906821,
+ "grad_norm": 23273.966796875,
+ "learning_rate": 6.419563003253258e-05,
+ "loss": 0.4331,
+ "step": 97800
+ },
+ {
+ "epoch": 0.5048472559732949,
+ "grad_norm": 21943.7734375,
+ "learning_rate": 6.416059930943585e-05,
+ "loss": 0.4331,
+ "step": 97850
+ },
+ {
+ "epoch": 0.5051052259559078,
+ "grad_norm": 23134.685546875,
+ "learning_rate": 6.412556102768544e-05,
+ "loss": 0.4283,
+ "step": 97900
+ },
+ {
+ "epoch": 0.5053631959385206,
+ "grad_norm": 21504.177734375,
+ "learning_rate": 6.409051520598405e-05,
+ "loss": 0.4319,
+ "step": 97950
+ },
+ {
+ "epoch": 0.5056211659211334,
+ "grad_norm": 25481.029296875,
+ "learning_rate": 6.405546186303852e-05,
+ "loss": 0.4268,
+ "step": 98000
+ },
+ {
+ "epoch": 0.5058791359037462,
+ "grad_norm": 21170.70703125,
+ "learning_rate": 6.402040101755961e-05,
+ "loss": 0.4253,
+ "step": 98050
+ },
+ {
+ "epoch": 0.5061371058863591,
+ "grad_norm": 20005.333984375,
+ "learning_rate": 6.398533268826212e-05,
+ "loss": 0.4267,
+ "step": 98100
+ },
+ {
+ "epoch": 0.5063950758689719,
+ "grad_norm": 20913.32421875,
+ "learning_rate": 6.395025689386485e-05,
+ "loss": 0.4245,
+ "step": 98150
+ },
+ {
+ "epoch": 0.5066530458515847,
+ "grad_norm": 24310.720703125,
+ "learning_rate": 6.391517365309059e-05,
+ "loss": 0.4246,
+ "step": 98200
+ },
+ {
+ "epoch": 0.5069110158341975,
+ "grad_norm": 21981.455078125,
+ "learning_rate": 6.388008298466607e-05,
+ "loss": 0.4286,
+ "step": 98250
+ },
+ {
+ "epoch": 0.5071689858168104,
+ "grad_norm": 23764.30078125,
+ "learning_rate": 6.384498490732202e-05,
+ "loss": 0.4282,
+ "step": 98300
+ },
+ {
+ "epoch": 0.5074269557994232,
+ "grad_norm": 20518.447265625,
+ "learning_rate": 6.380987943979314e-05,
+ "loss": 0.4333,
+ "step": 98350
+ },
+ {
+ "epoch": 0.5076849257820361,
+ "grad_norm": 23327.80859375,
+ "learning_rate": 6.377476660081803e-05,
+ "loss": 0.4255,
+ "step": 98400
+ },
+ {
+ "epoch": 0.5079428957646488,
+ "grad_norm": 19600.84375,
+ "learning_rate": 6.373964640913924e-05,
+ "loss": 0.4277,
+ "step": 98450
+ },
+ {
+ "epoch": 0.5082008657472616,
+ "grad_norm": 23252.146484375,
+ "learning_rate": 6.370451888350322e-05,
+ "loss": 0.4311,
+ "step": 98500
+ },
+ {
+ "epoch": 0.5084588357298745,
+ "grad_norm": 21930.736328125,
+ "learning_rate": 6.366938404266041e-05,
+ "loss": 0.4329,
+ "step": 98550
+ },
+ {
+ "epoch": 0.5087168057124873,
+ "grad_norm": 21249.69140625,
+ "learning_rate": 6.36342419053651e-05,
+ "loss": 0.4257,
+ "step": 98600
+ },
+ {
+ "epoch": 0.5089747756951001,
+ "grad_norm": 21809.4609375,
+ "learning_rate": 6.359909249037548e-05,
+ "loss": 0.431,
+ "step": 98650
+ },
+ {
+ "epoch": 0.5092327456777129,
+ "grad_norm": 23142.6796875,
+ "learning_rate": 6.356393581645359e-05,
+ "loss": 0.4329,
+ "step": 98700
+ },
+ {
+ "epoch": 0.5094907156603258,
+ "grad_norm": 21783.541015625,
+ "learning_rate": 6.352877190236542e-05,
+ "loss": 0.4362,
+ "step": 98750
+ },
+ {
+ "epoch": 0.5097486856429386,
+ "grad_norm": 22534.080078125,
+ "learning_rate": 6.349360076688079e-05,
+ "loss": 0.4302,
+ "step": 98800
+ },
+ {
+ "epoch": 0.5100066556255514,
+ "grad_norm": 22630.03515625,
+ "learning_rate": 6.345842242877336e-05,
+ "loss": 0.4314,
+ "step": 98850
+ },
+ {
+ "epoch": 0.5102646256081642,
+ "grad_norm": 23446.0390625,
+ "learning_rate": 6.342323690682064e-05,
+ "loss": 0.428,
+ "step": 98900
+ },
+ {
+ "epoch": 0.5105225955907771,
+ "grad_norm": 25644.2734375,
+ "learning_rate": 6.338804421980398e-05,
+ "loss": 0.4219,
+ "step": 98950
+ },
+ {
+ "epoch": 0.5107805655733899,
+ "grad_norm": 23159.580078125,
+ "learning_rate": 6.335284438650856e-05,
+ "loss": 0.434,
+ "step": 99000
+ },
+ {
+ "epoch": 0.5110385355560026,
+ "grad_norm": 23536.556640625,
+ "learning_rate": 6.331763742572337e-05,
+ "loss": 0.4293,
+ "step": 99050
+ },
+ {
+ "epoch": 0.5112965055386155,
+ "grad_norm": 23240.662109375,
+ "learning_rate": 6.328242335624121e-05,
+ "loss": 0.434,
+ "step": 99100
+ },
+ {
+ "epoch": 0.5115544755212283,
+ "grad_norm": 22368.94921875,
+ "learning_rate": 6.324720219685866e-05,
+ "loss": 0.4295,
+ "step": 99150
+ },
+ {
+ "epoch": 0.5118124455038412,
+ "grad_norm": 23257.068359375,
+ "learning_rate": 6.321197396637608e-05,
+ "loss": 0.4198,
+ "step": 99200
+ },
+ {
+ "epoch": 0.512070415486454,
+ "grad_norm": 21806.6953125,
+ "learning_rate": 6.317673868359765e-05,
+ "loss": 0.4241,
+ "step": 99250
+ },
+ {
+ "epoch": 0.5123283854690668,
+ "grad_norm": 24117.416015625,
+ "learning_rate": 6.314149636733125e-05,
+ "loss": 0.4261,
+ "step": 99300
+ },
+ {
+ "epoch": 0.5125863554516796,
+ "grad_norm": 25262.626953125,
+ "learning_rate": 6.310624703638858e-05,
+ "loss": 0.4234,
+ "step": 99350
+ },
+ {
+ "epoch": 0.5128443254342925,
+ "grad_norm": 22739.923828125,
+ "learning_rate": 6.3070990709585e-05,
+ "loss": 0.4299,
+ "step": 99400
+ },
+ {
+ "epoch": 0.5131022954169053,
+ "grad_norm": 20651.646484375,
+ "learning_rate": 6.303572740573971e-05,
+ "loss": 0.4307,
+ "step": 99450
+ },
+ {
+ "epoch": 0.5133602653995181,
+ "grad_norm": 22125.037109375,
+ "learning_rate": 6.300045714367555e-05,
+ "loss": 0.4216,
+ "step": 99500
+ },
+ {
+ "epoch": 0.5136182353821309,
+ "grad_norm": 22210.080078125,
+ "learning_rate": 6.29651799422191e-05,
+ "loss": 0.429,
+ "step": 99550
+ },
+ {
+ "epoch": 0.5138762053647438,
+ "grad_norm": 23850.673828125,
+ "learning_rate": 6.292989582020063e-05,
+ "loss": 0.4337,
+ "step": 99600
+ },
+ {
+ "epoch": 0.5141341753473566,
+ "grad_norm": 21346.251953125,
+ "learning_rate": 6.289460479645417e-05,
+ "loss": 0.4352,
+ "step": 99650
+ },
+ {
+ "epoch": 0.5143921453299694,
+ "grad_norm": 22687.080078125,
+ "learning_rate": 6.285930688981735e-05,
+ "loss": 0.433,
+ "step": 99700
+ },
+ {
+ "epoch": 0.5146501153125822,
+ "grad_norm": 20447.666015625,
+ "learning_rate": 6.282400211913154e-05,
+ "loss": 0.4288,
+ "step": 99750
+ },
+ {
+ "epoch": 0.514908085295195,
+ "grad_norm": 21768.51953125,
+ "learning_rate": 6.278869050324168e-05,
+ "loss": 0.4363,
+ "step": 99800
+ },
+ {
+ "epoch": 0.5151660552778079,
+ "grad_norm": 21896.47265625,
+ "learning_rate": 6.27533720609965e-05,
+ "loss": 0.4307,
+ "step": 99850
+ },
+ {
+ "epoch": 0.5154240252604207,
+ "grad_norm": 22967.384765625,
+ "learning_rate": 6.271804681124827e-05,
+ "loss": 0.4295,
+ "step": 99900
+ },
+ {
+ "epoch": 0.5156819952430335,
+ "grad_norm": 20233.869140625,
+ "learning_rate": 6.268271477285292e-05,
+ "loss": 0.4329,
+ "step": 99950
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "grad_norm": 20550.060546875,
+ "learning_rate": 6.264737596466998e-05,
+ "loss": 0.4267,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5159399652256463,
+ "eval_loss": 0.4161209166049957,
+ "eval_runtime": 2887.0736,
+ "eval_samples_per_second": 1074.14,
+ "eval_steps_per_second": 2.098,
+ "step": 100000
+ },
+ {
+ "epoch": 0.5161979352082592,
+ "grad_norm": 22327.767578125,
+ "learning_rate": 6.261203040556267e-05,
+ "loss": 0.4272,
+ "step": 100050
+ },
+ {
+ "epoch": 0.516455905190872,
+ "grad_norm": 22512.1640625,
+ "learning_rate": 6.257667811439776e-05,
+ "loss": 0.4267,
+ "step": 100100
+ },
+ {
+ "epoch": 0.5167138751734848,
+ "grad_norm": 22710.8828125,
+ "learning_rate": 6.254131911004561e-05,
+ "loss": 0.42,
+ "step": 100150
+ },
+ {
+ "epoch": 0.5169718451560976,
+ "grad_norm": 21731.365234375,
+ "learning_rate": 6.250595341138014e-05,
+ "loss": 0.4259,
+ "step": 100200
+ },
+ {
+ "epoch": 0.5172298151387105,
+ "grad_norm": 21478.970703125,
+ "learning_rate": 6.247058103727892e-05,
+ "loss": 0.4217,
+ "step": 100250
+ },
+ {
+ "epoch": 0.5174877851213233,
+ "grad_norm": 22431.939453125,
+ "learning_rate": 6.243520200662303e-05,
+ "loss": 0.4272,
+ "step": 100300
+ },
+ {
+ "epoch": 0.5177457551039361,
+ "grad_norm": 22137.5078125,
+ "learning_rate": 6.239981633829709e-05,
+ "loss": 0.4301,
+ "step": 100350
+ },
+ {
+ "epoch": 0.5180037250865489,
+ "grad_norm": 22802.220703125,
+ "learning_rate": 6.23644240511893e-05,
+ "loss": 0.4346,
+ "step": 100400
+ },
+ {
+ "epoch": 0.5182616950691618,
+ "grad_norm": 20567.640625,
+ "learning_rate": 6.232902516419137e-05,
+ "loss": 0.4271,
+ "step": 100450
+ },
+ {
+ "epoch": 0.5185196650517746,
+ "grad_norm": 20855.70703125,
+ "learning_rate": 6.229361969619855e-05,
+ "loss": 0.4237,
+ "step": 100500
+ },
+ {
+ "epoch": 0.5187776350343875,
+ "grad_norm": 22052.44921875,
+ "learning_rate": 6.225820766610958e-05,
+ "loss": 0.4324,
+ "step": 100550
+ },
+ {
+ "epoch": 0.5190356050170002,
+ "grad_norm": 21984.818359375,
+ "learning_rate": 6.222278909282674e-05,
+ "loss": 0.4315,
+ "step": 100600
+ },
+ {
+ "epoch": 0.519293574999613,
+ "grad_norm": 22044.8359375,
+ "learning_rate": 6.218736399525575e-05,
+ "loss": 0.4324,
+ "step": 100650
+ },
+ {
+ "epoch": 0.5195515449822259,
+ "grad_norm": 22661.78515625,
+ "learning_rate": 6.215193239230586e-05,
+ "loss": 0.4273,
+ "step": 100700
+ },
+ {
+ "epoch": 0.5198095149648387,
+ "grad_norm": 22091.01171875,
+ "learning_rate": 6.211649430288976e-05,
+ "loss": 0.4252,
+ "step": 100750
+ },
+ {
+ "epoch": 0.5200674849474515,
+ "grad_norm": 22164.376953125,
+ "learning_rate": 6.208104974592364e-05,
+ "loss": 0.4272,
+ "step": 100800
+ },
+ {
+ "epoch": 0.5203254549300643,
+ "grad_norm": 23387.287109375,
+ "learning_rate": 6.20455987403271e-05,
+ "loss": 0.4281,
+ "step": 100850
+ },
+ {
+ "epoch": 0.5205834249126772,
+ "grad_norm": 22505.326171875,
+ "learning_rate": 6.201014130502317e-05,
+ "loss": 0.4285,
+ "step": 100900
+ },
+ {
+ "epoch": 0.52084139489529,
+ "grad_norm": 21150.341796875,
+ "learning_rate": 6.19746774589384e-05,
+ "loss": 0.4274,
+ "step": 100950
+ },
+ {
+ "epoch": 0.5210993648779028,
+ "grad_norm": 23076.650390625,
+ "learning_rate": 6.193920722100268e-05,
+ "loss": 0.4289,
+ "step": 101000
+ },
+ {
+ "epoch": 0.5213573348605156,
+ "grad_norm": 20890.41796875,
+ "learning_rate": 6.190373061014932e-05,
+ "loss": 0.4305,
+ "step": 101050
+ },
+ {
+ "epoch": 0.5216153048431285,
+ "grad_norm": 22231.6328125,
+ "learning_rate": 6.186824764531507e-05,
+ "loss": 0.4304,
+ "step": 101100
+ },
+ {
+ "epoch": 0.5218732748257413,
+ "grad_norm": 22094.197265625,
+ "learning_rate": 6.183275834544005e-05,
+ "loss": 0.4279,
+ "step": 101150
+ },
+ {
+ "epoch": 0.522131244808354,
+ "grad_norm": 23188.353515625,
+ "learning_rate": 6.179726272946774e-05,
+ "loss": 0.4272,
+ "step": 101200
+ },
+ {
+ "epoch": 0.5223892147909669,
+ "grad_norm": 22908.5,
+ "learning_rate": 6.176176081634504e-05,
+ "loss": 0.4229,
+ "step": 101250
+ },
+ {
+ "epoch": 0.5226471847735797,
+ "grad_norm": 21536.37109375,
+ "learning_rate": 6.172625262502215e-05,
+ "loss": 0.4267,
+ "step": 101300
+ },
+ {
+ "epoch": 0.5229051547561926,
+ "grad_norm": 22923.38671875,
+ "learning_rate": 6.169073817445268e-05,
+ "loss": 0.4256,
+ "step": 101350
+ },
+ {
+ "epoch": 0.5231631247388054,
+ "grad_norm": 22802.669921875,
+ "learning_rate": 6.165521748359356e-05,
+ "loss": 0.4241,
+ "step": 101400
+ },
+ {
+ "epoch": 0.5234210947214182,
+ "grad_norm": 22852.59765625,
+ "learning_rate": 6.161969057140504e-05,
+ "loss": 0.4275,
+ "step": 101450
+ },
+ {
+ "epoch": 0.523679064704031,
+ "grad_norm": 27410.056640625,
+ "learning_rate": 6.158415745685068e-05,
+ "loss": 0.4316,
+ "step": 101500
+ },
+ {
+ "epoch": 0.5239370346866439,
+ "grad_norm": 21783.482421875,
+ "learning_rate": 6.15486181588974e-05,
+ "loss": 0.4235,
+ "step": 101550
+ },
+ {
+ "epoch": 0.5241950046692567,
+ "grad_norm": 21013.259765625,
+ "learning_rate": 6.151307269651536e-05,
+ "loss": 0.426,
+ "step": 101600
+ },
+ {
+ "epoch": 0.5244529746518695,
+ "grad_norm": 23852.673828125,
+ "learning_rate": 6.147752108867807e-05,
+ "loss": 0.4226,
+ "step": 101650
+ },
+ {
+ "epoch": 0.5247109446344823,
+ "grad_norm": 24846.427734375,
+ "learning_rate": 6.144196335436225e-05,
+ "loss": 0.4277,
+ "step": 101700
+ },
+ {
+ "epoch": 0.5249689146170952,
+ "grad_norm": 21197.177734375,
+ "learning_rate": 6.140639951254796e-05,
+ "loss": 0.4247,
+ "step": 101750
+ },
+ {
+ "epoch": 0.525226884599708,
+ "grad_norm": 24620.37890625,
+ "learning_rate": 6.137082958221848e-05,
+ "loss": 0.429,
+ "step": 101800
+ },
+ {
+ "epoch": 0.5254848545823207,
+ "grad_norm": 22811.875,
+ "learning_rate": 6.133525358236036e-05,
+ "loss": 0.4274,
+ "step": 101850
+ },
+ {
+ "epoch": 0.5257428245649336,
+ "grad_norm": 20224.125,
+ "learning_rate": 6.129967153196336e-05,
+ "loss": 0.4338,
+ "step": 101900
+ },
+ {
+ "epoch": 0.5260007945475464,
+ "grad_norm": 21489.734375,
+ "learning_rate": 6.126408345002052e-05,
+ "loss": 0.4333,
+ "step": 101950
+ },
+ {
+ "epoch": 0.5262587645301593,
+ "grad_norm": 21771.20703125,
+ "learning_rate": 6.122848935552804e-05,
+ "loss": 0.4258,
+ "step": 102000
+ },
+ {
+ "epoch": 0.5265167345127721,
+ "grad_norm": 23362.43359375,
+ "learning_rate": 6.119288926748537e-05,
+ "loss": 0.4234,
+ "step": 102050
+ },
+ {
+ "epoch": 0.5267747044953849,
+ "grad_norm": 20869.46484375,
+ "learning_rate": 6.115728320489516e-05,
+ "loss": 0.4233,
+ "step": 102100
+ },
+ {
+ "epoch": 0.5270326744779977,
+ "grad_norm": 21146.568359375,
+ "learning_rate": 6.11216711867632e-05,
+ "loss": 0.4243,
+ "step": 102150
+ },
+ {
+ "epoch": 0.5272906444606106,
+ "grad_norm": 24031.97265625,
+ "learning_rate": 6.108605323209853e-05,
+ "loss": 0.4334,
+ "step": 102200
+ },
+ {
+ "epoch": 0.5275486144432234,
+ "grad_norm": 23461.306640625,
+ "learning_rate": 6.10504293599133e-05,
+ "loss": 0.4289,
+ "step": 102250
+ },
+ {
+ "epoch": 0.5278065844258362,
+ "grad_norm": 21013.169921875,
+ "learning_rate": 6.101479958922287e-05,
+ "loss": 0.4334,
+ "step": 102300
+ },
+ {
+ "epoch": 0.528064554408449,
+ "grad_norm": 23328.306640625,
+ "learning_rate": 6.0979163939045716e-05,
+ "loss": 0.4285,
+ "step": 102350
+ },
+ {
+ "epoch": 0.5283225243910619,
+ "grad_norm": 21542.20703125,
+ "learning_rate": 6.094352242840343e-05,
+ "loss": 0.4321,
+ "step": 102400
+ },
+ {
+ "epoch": 0.5285804943736747,
+ "grad_norm": 20556.357421875,
+ "learning_rate": 6.09078750763208e-05,
+ "loss": 0.4255,
+ "step": 102450
+ },
+ {
+ "epoch": 0.5288384643562875,
+ "grad_norm": 24925.21875,
+ "learning_rate": 6.0872221901825666e-05,
+ "loss": 0.4225,
+ "step": 102500
+ },
+ {
+ "epoch": 0.5290964343389003,
+ "grad_norm": 22750.419921875,
+ "learning_rate": 6.0836562923949016e-05,
+ "loss": 0.4287,
+ "step": 102550
+ },
+ {
+ "epoch": 0.5293544043215132,
+ "grad_norm": 21514.8984375,
+ "learning_rate": 6.080089816172489e-05,
+ "loss": 0.4254,
+ "step": 102600
+ },
+ {
+ "epoch": 0.529612374304126,
+ "grad_norm": 23347.03125,
+ "learning_rate": 6.07652276341905e-05,
+ "loss": 0.4346,
+ "step": 102650
+ },
+ {
+ "epoch": 0.5298703442867388,
+ "grad_norm": 23180.916015625,
+ "learning_rate": 6.072955136038604e-05,
+ "loss": 0.4244,
+ "step": 102700
+ },
+ {
+ "epoch": 0.5301283142693516,
+ "grad_norm": 20701.431640625,
+ "learning_rate": 6.069386935935484e-05,
+ "loss": 0.43,
+ "step": 102750
+ },
+ {
+ "epoch": 0.5303862842519644,
+ "grad_norm": 23350.99609375,
+ "learning_rate": 6.0658181650143245e-05,
+ "loss": 0.4217,
+ "step": 102800
+ },
+ {
+ "epoch": 0.5306442542345773,
+ "grad_norm": 21068.111328125,
+ "learning_rate": 6.062248825180066e-05,
+ "loss": 0.4278,
+ "step": 102850
+ },
+ {
+ "epoch": 0.5309022242171901,
+ "grad_norm": 23415.25,
+ "learning_rate": 6.0586789183379554e-05,
+ "loss": 0.4331,
+ "step": 102900
+ },
+ {
+ "epoch": 0.5311601941998029,
+ "grad_norm": 22186.048828125,
+ "learning_rate": 6.055108446393538e-05,
+ "loss": 0.4327,
+ "step": 102950
+ },
+ {
+ "epoch": 0.5314181641824157,
+ "grad_norm": 20644.166015625,
+ "learning_rate": 6.051537411252662e-05,
+ "loss": 0.4264,
+ "step": 103000
+ },
+ {
+ "epoch": 0.5316761341650286,
+ "grad_norm": 21755.712890625,
+ "learning_rate": 6.047965814821478e-05,
+ "loss": 0.4253,
+ "step": 103050
+ },
+ {
+ "epoch": 0.5319341041476414,
+ "grad_norm": 22319.177734375,
+ "learning_rate": 6.044393659006435e-05,
+ "loss": 0.4238,
+ "step": 103100
+ },
+ {
+ "epoch": 0.5321920741302542,
+ "grad_norm": 22544.064453125,
+ "learning_rate": 6.040820945714281e-05,
+ "loss": 0.4306,
+ "step": 103150
+ },
+ {
+ "epoch": 0.532450044112867,
+ "grad_norm": 21484.53125,
+ "learning_rate": 6.037247676852059e-05,
+ "loss": 0.4254,
+ "step": 103200
+ },
+ {
+ "epoch": 0.5327080140954799,
+ "grad_norm": 23923.201171875,
+ "learning_rate": 6.033673854327114e-05,
+ "loss": 0.4258,
+ "step": 103250
+ },
+ {
+ "epoch": 0.5329659840780927,
+ "grad_norm": 20412.08984375,
+ "learning_rate": 6.03009948004708e-05,
+ "loss": 0.4286,
+ "step": 103300
+ },
+ {
+ "epoch": 0.5332239540607056,
+ "grad_norm": 19932.908203125,
+ "learning_rate": 6.026524555919891e-05,
+ "loss": 0.4367,
+ "step": 103350
+ },
+ {
+ "epoch": 0.5334819240433183,
+ "grad_norm": 21761.033203125,
+ "learning_rate": 6.022949083853772e-05,
+ "loss": 0.4272,
+ "step": 103400
+ },
+ {
+ "epoch": 0.5337398940259311,
+ "grad_norm": 23392.29296875,
+ "learning_rate": 6.019373065757239e-05,
+ "loss": 0.4274,
+ "step": 103450
+ },
+ {
+ "epoch": 0.533997864008544,
+ "grad_norm": 26151.69921875,
+ "learning_rate": 6.015796503539103e-05,
+ "loss": 0.4189,
+ "step": 103500
+ },
+ {
+ "epoch": 0.5342558339911568,
+ "grad_norm": 22503.529296875,
+ "learning_rate": 6.012219399108463e-05,
+ "loss": 0.428,
+ "step": 103550
+ },
+ {
+ "epoch": 0.5345138039737696,
+ "grad_norm": 25906.685546875,
+ "learning_rate": 6.008641754374709e-05,
+ "loss": 0.4287,
+ "step": 103600
+ },
+ {
+ "epoch": 0.5347717739563824,
+ "grad_norm": 23784.685546875,
+ "learning_rate": 6.005063571247517e-05,
+ "loss": 0.4276,
+ "step": 103650
+ },
+ {
+ "epoch": 0.5350297439389953,
+ "grad_norm": 21574.30078125,
+ "learning_rate": 6.0014848516368515e-05,
+ "loss": 0.4344,
+ "step": 103700
+ },
+ {
+ "epoch": 0.5352877139216081,
+ "grad_norm": 22296.921875,
+ "learning_rate": 5.9979055974529675e-05,
+ "loss": 0.4322,
+ "step": 103750
+ },
+ {
+ "epoch": 0.5355456839042209,
+ "grad_norm": 21478.611328125,
+ "learning_rate": 5.994325810606397e-05,
+ "loss": 0.429,
+ "step": 103800
+ },
+ {
+ "epoch": 0.5358036538868337,
+ "grad_norm": 22572.37109375,
+ "learning_rate": 5.9907454930079645e-05,
+ "loss": 0.4281,
+ "step": 103850
+ },
+ {
+ "epoch": 0.5360616238694466,
+ "grad_norm": 23416.80859375,
+ "learning_rate": 5.98716464656877e-05,
+ "loss": 0.4266,
+ "step": 103900
+ },
+ {
+ "epoch": 0.5363195938520594,
+ "grad_norm": 23470.626953125,
+ "learning_rate": 5.983583273200204e-05,
+ "loss": 0.426,
+ "step": 103950
+ },
+ {
+ "epoch": 0.5365775638346721,
+ "grad_norm": 24464.38671875,
+ "learning_rate": 5.980001374813933e-05,
+ "loss": 0.4218,
+ "step": 104000
+ },
+ {
+ "epoch": 0.536835533817285,
+ "grad_norm": 23835.29296875,
+ "learning_rate": 5.976418953321904e-05,
+ "loss": 0.4261,
+ "step": 104050
+ },
+ {
+ "epoch": 0.5370935037998978,
+ "grad_norm": 23344.654296875,
+ "learning_rate": 5.972836010636346e-05,
+ "loss": 0.4292,
+ "step": 104100
+ },
+ {
+ "epoch": 0.5373514737825107,
+ "grad_norm": 23925.935546875,
+ "learning_rate": 5.9692525486697616e-05,
+ "loss": 0.4323,
+ "step": 104150
+ },
+ {
+ "epoch": 0.5376094437651235,
+ "grad_norm": 23155.76953125,
+ "learning_rate": 5.965668569334937e-05,
+ "loss": 0.428,
+ "step": 104200
+ },
+ {
+ "epoch": 0.5378674137477363,
+ "grad_norm": 22334.19921875,
+ "learning_rate": 5.962084074544928e-05,
+ "loss": 0.4129,
+ "step": 104250
+ },
+ {
+ "epoch": 0.5381253837303491,
+ "grad_norm": 20239.66796875,
+ "learning_rate": 5.95849906621307e-05,
+ "loss": 0.4335,
+ "step": 104300
+ },
+ {
+ "epoch": 0.538383353712962,
+ "grad_norm": 22626.19140625,
+ "learning_rate": 5.9549135462529704e-05,
+ "loss": 0.4274,
+ "step": 104350
+ },
+ {
+ "epoch": 0.5386413236955748,
+ "grad_norm": 21798.65625,
+ "learning_rate": 5.951327516578512e-05,
+ "loss": 0.4258,
+ "step": 104400
+ },
+ {
+ "epoch": 0.5388992936781876,
+ "grad_norm": 21796.7421875,
+ "learning_rate": 5.947740979103845e-05,
+ "loss": 0.4263,
+ "step": 104450
+ },
+ {
+ "epoch": 0.5391572636608004,
+ "grad_norm": 22380.21484375,
+ "learning_rate": 5.944153935743396e-05,
+ "loss": 0.4218,
+ "step": 104500
+ },
+ {
+ "epoch": 0.5394152336434133,
+ "grad_norm": 22526.4296875,
+ "learning_rate": 5.940566388411859e-05,
+ "loss": 0.4233,
+ "step": 104550
+ },
+ {
+ "epoch": 0.5396732036260261,
+ "grad_norm": 22876.5703125,
+ "learning_rate": 5.936978339024195e-05,
+ "loss": 0.4296,
+ "step": 104600
+ },
+ {
+ "epoch": 0.5399311736086388,
+ "grad_norm": 22592.654296875,
+ "learning_rate": 5.9333897894956394e-05,
+ "loss": 0.4287,
+ "step": 104650
+ },
+ {
+ "epoch": 0.5401891435912517,
+ "grad_norm": 21235.43359375,
+ "learning_rate": 5.929800741741688e-05,
+ "loss": 0.4269,
+ "step": 104700
+ },
+ {
+ "epoch": 0.5404471135738645,
+ "grad_norm": 22049.05859375,
+ "learning_rate": 5.926211197678104e-05,
+ "loss": 0.4266,
+ "step": 104750
+ },
+ {
+ "epoch": 0.5407050835564774,
+ "grad_norm": 23252.845703125,
+ "learning_rate": 5.922621159220918e-05,
+ "loss": 0.4223,
+ "step": 104800
+ },
+ {
+ "epoch": 0.5409630535390902,
+ "grad_norm": 20577.1796875,
+ "learning_rate": 5.919030628286424e-05,
+ "loss": 0.4302,
+ "step": 104850
+ },
+ {
+ "epoch": 0.541221023521703,
+ "grad_norm": 24854.8671875,
+ "learning_rate": 5.915439606791174e-05,
+ "loss": 0.4212,
+ "step": 104900
+ },
+ {
+ "epoch": 0.5414789935043158,
+ "grad_norm": 22561.552734375,
+ "learning_rate": 5.9118480966519906e-05,
+ "loss": 0.4196,
+ "step": 104950
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "grad_norm": 23885.4765625,
+ "learning_rate": 5.9082560997859496e-05,
+ "loss": 0.421,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5417369634869287,
+ "eval_loss": 0.4132173955440521,
+ "eval_runtime": 2876.3365,
+ "eval_samples_per_second": 1078.149,
+ "eval_steps_per_second": 2.106,
+ "step": 105000
+ },
+ {
+ "epoch": 0.5419949334695415,
+ "grad_norm": 20974.994140625,
+ "learning_rate": 5.90466361811039e-05,
+ "loss": 0.4228,
+ "step": 105050
+ },
+ {
+ "epoch": 0.5422529034521543,
+ "grad_norm": 24338.412109375,
+ "learning_rate": 5.9010706535429086e-05,
+ "loss": 0.4215,
+ "step": 105100
+ },
+ {
+ "epoch": 0.5425108734347671,
+ "grad_norm": 20734.796875,
+ "learning_rate": 5.8974772080013605e-05,
+ "loss": 0.4319,
+ "step": 105150
+ },
+ {
+ "epoch": 0.54276884341738,
+ "grad_norm": 21026.123046875,
+ "learning_rate": 5.8938832834038574e-05,
+ "loss": 0.4318,
+ "step": 105200
+ },
+ {
+ "epoch": 0.5430268133999928,
+ "grad_norm": 20023.287109375,
+ "learning_rate": 5.890288881668766e-05,
+ "loss": 0.4306,
+ "step": 105250
+ },
+ {
+ "epoch": 0.5432847833826056,
+ "grad_norm": 23171.42578125,
+ "learning_rate": 5.88669400471471e-05,
+ "loss": 0.4237,
+ "step": 105300
+ },
+ {
+ "epoch": 0.5435427533652184,
+ "grad_norm": 21692.109375,
+ "learning_rate": 5.8830986544605635e-05,
+ "loss": 0.4261,
+ "step": 105350
+ },
+ {
+ "epoch": 0.5438007233478312,
+ "grad_norm": 22358.216796875,
+ "learning_rate": 5.8795028328254566e-05,
+ "loss": 0.4204,
+ "step": 105400
+ },
+ {
+ "epoch": 0.5440586933304441,
+ "grad_norm": 22529.650390625,
+ "learning_rate": 5.875906541728766e-05,
+ "loss": 0.422,
+ "step": 105450
+ },
+ {
+ "epoch": 0.544316663313057,
+ "grad_norm": 18307.05859375,
+ "learning_rate": 5.8723097830901264e-05,
+ "loss": 0.4236,
+ "step": 105500
+ },
+ {
+ "epoch": 0.5445746332956697,
+ "grad_norm": 22356.583984375,
+ "learning_rate": 5.8687125588294154e-05,
+ "loss": 0.4213,
+ "step": 105550
+ },
+ {
+ "epoch": 0.5448326032782825,
+ "grad_norm": 21446.732421875,
+ "learning_rate": 5.8651148708667625e-05,
+ "loss": 0.4216,
+ "step": 105600
+ },
+ {
+ "epoch": 0.5450905732608954,
+ "grad_norm": 24014.49609375,
+ "learning_rate": 5.8615167211225416e-05,
+ "loss": 0.4283,
+ "step": 105650
+ },
+ {
+ "epoch": 0.5453485432435082,
+ "grad_norm": 22394.306640625,
+ "learning_rate": 5.8579181115173785e-05,
+ "loss": 0.4242,
+ "step": 105700
+ },
+ {
+ "epoch": 0.545606513226121,
+ "grad_norm": 25348.26171875,
+ "learning_rate": 5.8543190439721405e-05,
+ "loss": 0.4234,
+ "step": 105750
+ },
+ {
+ "epoch": 0.5458644832087338,
+ "grad_norm": 22638.720703125,
+ "learning_rate": 5.850719520407939e-05,
+ "loss": 0.4269,
+ "step": 105800
+ },
+ {
+ "epoch": 0.5461224531913467,
+ "grad_norm": 22702.841796875,
+ "learning_rate": 5.847119542746131e-05,
+ "loss": 0.4201,
+ "step": 105850
+ },
+ {
+ "epoch": 0.5463804231739595,
+ "grad_norm": 22299.849609375,
+ "learning_rate": 5.843519112908315e-05,
+ "loss": 0.4243,
+ "step": 105900
+ },
+ {
+ "epoch": 0.5466383931565723,
+ "grad_norm": 21965.283203125,
+ "learning_rate": 5.8399182328163304e-05,
+ "loss": 0.4209,
+ "step": 105950
+ },
+ {
+ "epoch": 0.5468963631391851,
+ "grad_norm": 22101.755859375,
+ "learning_rate": 5.836316904392256e-05,
+ "loss": 0.4254,
+ "step": 106000
+ },
+ {
+ "epoch": 0.547154333121798,
+ "grad_norm": 22735.970703125,
+ "learning_rate": 5.8327151295584126e-05,
+ "loss": 0.4251,
+ "step": 106050
+ },
+ {
+ "epoch": 0.5474123031044108,
+ "grad_norm": 24287.58203125,
+ "learning_rate": 5.829112910237359e-05,
+ "loss": 0.427,
+ "step": 106100
+ },
+ {
+ "epoch": 0.5476702730870235,
+ "grad_norm": 22509.02734375,
+ "learning_rate": 5.825510248351889e-05,
+ "loss": 0.4209,
+ "step": 106150
+ },
+ {
+ "epoch": 0.5479282430696364,
+ "grad_norm": 22325.32421875,
+ "learning_rate": 5.821907145825032e-05,
+ "loss": 0.4276,
+ "step": 106200
+ },
+ {
+ "epoch": 0.5481862130522492,
+ "grad_norm": 21362.255859375,
+ "learning_rate": 5.8183036045800556e-05,
+ "loss": 0.4273,
+ "step": 106250
+ },
+ {
+ "epoch": 0.5484441830348621,
+ "grad_norm": 22934.61328125,
+ "learning_rate": 5.814699626540461e-05,
+ "loss": 0.4318,
+ "step": 106300
+ },
+ {
+ "epoch": 0.5487021530174749,
+ "grad_norm": 23663.65625,
+ "learning_rate": 5.8110952136299814e-05,
+ "loss": 0.4246,
+ "step": 106350
+ },
+ {
+ "epoch": 0.5489601230000877,
+ "grad_norm": 20743.84765625,
+ "learning_rate": 5.807490367772584e-05,
+ "loss": 0.4289,
+ "step": 106400
+ },
+ {
+ "epoch": 0.5492180929827005,
+ "grad_norm": 20859.244140625,
+ "learning_rate": 5.8038850908924636e-05,
+ "loss": 0.4255,
+ "step": 106450
+ },
+ {
+ "epoch": 0.5494760629653134,
+ "grad_norm": 21824.990234375,
+ "learning_rate": 5.800279384914047e-05,
+ "loss": 0.4311,
+ "step": 106500
+ },
+ {
+ "epoch": 0.5497340329479262,
+ "grad_norm": 19514.681640625,
+ "learning_rate": 5.7966732517619926e-05,
+ "loss": 0.4311,
+ "step": 106550
+ },
+ {
+ "epoch": 0.549992002930539,
+ "grad_norm": 24263.765625,
+ "learning_rate": 5.7930666933611835e-05,
+ "loss": 0.4257,
+ "step": 106600
+ },
+ {
+ "epoch": 0.5502499729131518,
+ "grad_norm": 23152.279296875,
+ "learning_rate": 5.789459711636729e-05,
+ "loss": 0.4226,
+ "step": 106650
+ },
+ {
+ "epoch": 0.5505079428957647,
+ "grad_norm": 21756.8671875,
+ "learning_rate": 5.785852308513967e-05,
+ "loss": 0.4266,
+ "step": 106700
+ },
+ {
+ "epoch": 0.5507659128783775,
+ "grad_norm": 20913.3125,
+ "learning_rate": 5.78224448591846e-05,
+ "loss": 0.4228,
+ "step": 106750
+ },
+ {
+ "epoch": 0.5510238828609902,
+ "grad_norm": 24674.92578125,
+ "learning_rate": 5.778636245775996e-05,
+ "loss": 0.4246,
+ "step": 106800
+ },
+ {
+ "epoch": 0.5512818528436031,
+ "grad_norm": 24229.4296875,
+ "learning_rate": 5.775027590012579e-05,
+ "loss": 0.4244,
+ "step": 106850
+ },
+ {
+ "epoch": 0.5515398228262159,
+ "grad_norm": 21722.048828125,
+ "learning_rate": 5.771418520554443e-05,
+ "loss": 0.4264,
+ "step": 106900
+ },
+ {
+ "epoch": 0.5517977928088288,
+ "grad_norm": 22060.224609375,
+ "learning_rate": 5.7678090393280384e-05,
+ "loss": 0.4268,
+ "step": 106950
+ },
+ {
+ "epoch": 0.5520557627914416,
+ "grad_norm": 25690.306640625,
+ "learning_rate": 5.7641991482600366e-05,
+ "loss": 0.4298,
+ "step": 107000
+ },
+ {
+ "epoch": 0.5523137327740544,
+ "grad_norm": 24629.115234375,
+ "learning_rate": 5.7605888492773266e-05,
+ "loss": 0.4223,
+ "step": 107050
+ },
+ {
+ "epoch": 0.5525717027566672,
+ "grad_norm": 23552.78515625,
+ "learning_rate": 5.756978144307018e-05,
+ "loss": 0.4246,
+ "step": 107100
+ },
+ {
+ "epoch": 0.5528296727392801,
+ "grad_norm": 21611.703125,
+ "learning_rate": 5.753367035276431e-05,
+ "loss": 0.4173,
+ "step": 107150
+ },
+ {
+ "epoch": 0.5530876427218929,
+ "grad_norm": 24158.64453125,
+ "learning_rate": 5.749755524113111e-05,
+ "loss": 0.4211,
+ "step": 107200
+ },
+ {
+ "epoch": 0.5533456127045057,
+ "grad_norm": 23446.94140625,
+ "learning_rate": 5.746143612744811e-05,
+ "loss": 0.4262,
+ "step": 107250
+ },
+ {
+ "epoch": 0.5536035826871185,
+ "grad_norm": 21608.703125,
+ "learning_rate": 5.742531303099498e-05,
+ "loss": 0.424,
+ "step": 107300
+ },
+ {
+ "epoch": 0.5538615526697314,
+ "grad_norm": 25070.78125,
+ "learning_rate": 5.738918597105353e-05,
+ "loss": 0.4219,
+ "step": 107350
+ },
+ {
+ "epoch": 0.5541195226523442,
+ "grad_norm": 21161.5234375,
+ "learning_rate": 5.735305496690769e-05,
+ "loss": 0.4293,
+ "step": 107400
+ },
+ {
+ "epoch": 0.554377492634957,
+ "grad_norm": 23108.521484375,
+ "learning_rate": 5.7316920037843516e-05,
+ "loss": 0.427,
+ "step": 107450
+ },
+ {
+ "epoch": 0.5546354626175698,
+ "grad_norm": 22233.87890625,
+ "learning_rate": 5.728078120314909e-05,
+ "loss": 0.4204,
+ "step": 107500
+ },
+ {
+ "epoch": 0.5548934326001826,
+ "grad_norm": 24522.310546875,
+ "learning_rate": 5.724463848211464e-05,
+ "loss": 0.4257,
+ "step": 107550
+ },
+ {
+ "epoch": 0.5551514025827955,
+ "grad_norm": 22916.892578125,
+ "learning_rate": 5.720849189403244e-05,
+ "loss": 0.4251,
+ "step": 107600
+ },
+ {
+ "epoch": 0.5554093725654083,
+ "grad_norm": 20069.236328125,
+ "learning_rate": 5.7172341458196876e-05,
+ "loss": 0.424,
+ "step": 107650
+ },
+ {
+ "epoch": 0.5556673425480211,
+ "grad_norm": 25173.86328125,
+ "learning_rate": 5.713618719390432e-05,
+ "loss": 0.4294,
+ "step": 107700
+ },
+ {
+ "epoch": 0.5559253125306339,
+ "grad_norm": 21957.373046875,
+ "learning_rate": 5.710002912045323e-05,
+ "loss": 0.4239,
+ "step": 107750
+ },
+ {
+ "epoch": 0.5561832825132468,
+ "grad_norm": 20540.82421875,
+ "learning_rate": 5.706386725714407e-05,
+ "loss": 0.4259,
+ "step": 107800
+ },
+ {
+ "epoch": 0.5564412524958596,
+ "grad_norm": 22470.4921875,
+ "learning_rate": 5.702770162327936e-05,
+ "loss": 0.4273,
+ "step": 107850
+ },
+ {
+ "epoch": 0.5566992224784724,
+ "grad_norm": 21721.197265625,
+ "learning_rate": 5.69915322381636e-05,
+ "loss": 0.4233,
+ "step": 107900
+ },
+ {
+ "epoch": 0.5569571924610852,
+ "grad_norm": 21666.955078125,
+ "learning_rate": 5.6955359121103324e-05,
+ "loss": 0.4283,
+ "step": 107950
+ },
+ {
+ "epoch": 0.5572151624436981,
+ "grad_norm": 20970.9296875,
+ "learning_rate": 5.6919182291407014e-05,
+ "loss": 0.4275,
+ "step": 108000
+ },
+ {
+ "epoch": 0.5574731324263109,
+ "grad_norm": 22353.306640625,
+ "learning_rate": 5.688300176838518e-05,
+ "loss": 0.4244,
+ "step": 108050
+ },
+ {
+ "epoch": 0.5577311024089237,
+ "grad_norm": 22607.357421875,
+ "learning_rate": 5.68468175713503e-05,
+ "loss": 0.4252,
+ "step": 108100
+ },
+ {
+ "epoch": 0.5579890723915365,
+ "grad_norm": 21949.1015625,
+ "learning_rate": 5.681062971961677e-05,
+ "loss": 0.4194,
+ "step": 108150
+ },
+ {
+ "epoch": 0.5582470423741493,
+ "grad_norm": 23135.21875,
+ "learning_rate": 5.677443823250099e-05,
+ "loss": 0.425,
+ "step": 108200
+ },
+ {
+ "epoch": 0.5585050123567622,
+ "grad_norm": 19050.34765625,
+ "learning_rate": 5.673824312932123e-05,
+ "loss": 0.422,
+ "step": 108250
+ },
+ {
+ "epoch": 0.5587629823393749,
+ "grad_norm": 22969.15625,
+ "learning_rate": 5.67020444293978e-05,
+ "loss": 0.4253,
+ "step": 108300
+ },
+ {
+ "epoch": 0.5590209523219878,
+ "grad_norm": 22808.203125,
+ "learning_rate": 5.666584215205282e-05,
+ "loss": 0.4261,
+ "step": 108350
+ },
+ {
+ "epoch": 0.5592789223046006,
+ "grad_norm": 23061.126953125,
+ "learning_rate": 5.662963631661038e-05,
+ "loss": 0.4248,
+ "step": 108400
+ },
+ {
+ "epoch": 0.5595368922872135,
+ "grad_norm": 24134.693359375,
+ "learning_rate": 5.659342694239642e-05,
+ "loss": 0.4273,
+ "step": 108450
+ },
+ {
+ "epoch": 0.5597948622698263,
+ "grad_norm": 23659.2578125,
+ "learning_rate": 5.655721404873886e-05,
+ "loss": 0.427,
+ "step": 108500
+ },
+ {
+ "epoch": 0.5600528322524391,
+ "grad_norm": 20205.1953125,
+ "learning_rate": 5.652099765496741e-05,
+ "loss": 0.4257,
+ "step": 108550
+ },
+ {
+ "epoch": 0.5603108022350519,
+ "grad_norm": 21324.837890625,
+ "learning_rate": 5.6484777780413686e-05,
+ "loss": 0.4248,
+ "step": 108600
+ },
+ {
+ "epoch": 0.5605687722176648,
+ "grad_norm": 21779.849609375,
+ "learning_rate": 5.644855444441114e-05,
+ "loss": 0.4259,
+ "step": 108650
+ },
+ {
+ "epoch": 0.5608267422002776,
+ "grad_norm": 20502.0859375,
+ "learning_rate": 5.641232766629512e-05,
+ "loss": 0.4281,
+ "step": 108700
+ },
+ {
+ "epoch": 0.5610847121828904,
+ "grad_norm": 23600.5859375,
+ "learning_rate": 5.637609746540276e-05,
+ "loss": 0.4183,
+ "step": 108750
+ },
+ {
+ "epoch": 0.5613426821655032,
+ "grad_norm": 22977.41015625,
+ "learning_rate": 5.633986386107302e-05,
+ "loss": 0.4219,
+ "step": 108800
+ },
+ {
+ "epoch": 0.561600652148116,
+ "grad_norm": 23411.263671875,
+ "learning_rate": 5.630362687264672e-05,
+ "loss": 0.4268,
+ "step": 108850
+ },
+ {
+ "epoch": 0.5618586221307289,
+ "grad_norm": 20194.060546875,
+ "learning_rate": 5.6267386519466446e-05,
+ "loss": 0.4175,
+ "step": 108900
+ },
+ {
+ "epoch": 0.5621165921133416,
+ "grad_norm": 19387.88671875,
+ "learning_rate": 5.623114282087664e-05,
+ "loss": 0.4274,
+ "step": 108950
+ },
+ {
+ "epoch": 0.5623745620959545,
+ "grad_norm": 23158.28125,
+ "learning_rate": 5.619489579622343e-05,
+ "loss": 0.4222,
+ "step": 109000
+ },
+ {
+ "epoch": 0.5626325320785673,
+ "grad_norm": 23551.431640625,
+ "learning_rate": 5.6158645464854817e-05,
+ "loss": 0.428,
+ "step": 109050
+ },
+ {
+ "epoch": 0.5628905020611802,
+ "grad_norm": 23904.896484375,
+ "learning_rate": 5.6122391846120495e-05,
+ "loss": 0.4252,
+ "step": 109100
+ },
+ {
+ "epoch": 0.563148472043793,
+ "grad_norm": 21354.61328125,
+ "learning_rate": 5.608613495937197e-05,
+ "loss": 0.4202,
+ "step": 109150
+ },
+ {
+ "epoch": 0.5634064420264058,
+ "grad_norm": 23561.978515625,
+ "learning_rate": 5.6049874823962456e-05,
+ "loss": 0.4301,
+ "step": 109200
+ },
+ {
+ "epoch": 0.5636644120090186,
+ "grad_norm": 20979.53515625,
+ "learning_rate": 5.601361145924692e-05,
+ "loss": 0.4204,
+ "step": 109250
+ },
+ {
+ "epoch": 0.5639223819916315,
+ "grad_norm": 24039.125,
+ "learning_rate": 5.5977344884582e-05,
+ "loss": 0.4284,
+ "step": 109300
+ },
+ {
+ "epoch": 0.5641803519742443,
+ "grad_norm": 22242.35546875,
+ "learning_rate": 5.594107511932615e-05,
+ "loss": 0.4248,
+ "step": 109350
+ },
+ {
+ "epoch": 0.5644383219568571,
+ "grad_norm": 20016.1875,
+ "learning_rate": 5.5904802182839434e-05,
+ "loss": 0.4222,
+ "step": 109400
+ },
+ {
+ "epoch": 0.5646962919394699,
+ "grad_norm": 22243.0703125,
+ "learning_rate": 5.5868526094483666e-05,
+ "loss": 0.4276,
+ "step": 109450
+ },
+ {
+ "epoch": 0.5649542619220828,
+ "grad_norm": 23286.38671875,
+ "learning_rate": 5.58322468736223e-05,
+ "loss": 0.4208,
+ "step": 109500
+ },
+ {
+ "epoch": 0.5652122319046956,
+ "grad_norm": 21801.802734375,
+ "learning_rate": 5.579596453962047e-05,
+ "loss": 0.4275,
+ "step": 109550
+ },
+ {
+ "epoch": 0.5654702018873083,
+ "grad_norm": 23282.025390625,
+ "learning_rate": 5.575967911184502e-05,
+ "loss": 0.4255,
+ "step": 109600
+ },
+ {
+ "epoch": 0.5657281718699212,
+ "grad_norm": 25253.943359375,
+ "learning_rate": 5.572339060966439e-05,
+ "loss": 0.4239,
+ "step": 109650
+ },
+ {
+ "epoch": 0.565986141852534,
+ "grad_norm": 22364.595703125,
+ "learning_rate": 5.5687099052448675e-05,
+ "loss": 0.4255,
+ "step": 109700
+ },
+ {
+ "epoch": 0.5662441118351469,
+ "grad_norm": 23305.46484375,
+ "learning_rate": 5.565080445956961e-05,
+ "loss": 0.4254,
+ "step": 109750
+ },
+ {
+ "epoch": 0.5665020818177597,
+ "grad_norm": 20225.2421875,
+ "learning_rate": 5.561450685040054e-05,
+ "loss": 0.4239,
+ "step": 109800
+ },
+ {
+ "epoch": 0.5667600518003725,
+ "grad_norm": 20221.8203125,
+ "learning_rate": 5.557820624431645e-05,
+ "loss": 0.4171,
+ "step": 109850
+ },
+ {
+ "epoch": 0.5670180217829853,
+ "grad_norm": 19833.607421875,
+ "learning_rate": 5.554190266069387e-05,
+ "loss": 0.4224,
+ "step": 109900
+ },
+ {
+ "epoch": 0.5672759917655982,
+ "grad_norm": 19884.58203125,
+ "learning_rate": 5.550559611891095e-05,
+ "loss": 0.4196,
+ "step": 109950
+ },
+ {
+ "epoch": 0.567533961748211,
+ "grad_norm": 22072.25390625,
+ "learning_rate": 5.546928663834745e-05,
+ "loss": 0.4196,
+ "step": 110000
+ },
+ {
+ "epoch": 0.567533961748211,
+ "eval_loss": 0.4103853106498718,
+ "eval_runtime": 3606.5234,
+ "eval_samples_per_second": 859.864,
+ "eval_steps_per_second": 1.679,
+ "step": 110000
+ },
+ {
+ "epoch": 0.5677919317308238,
+ "grad_norm": 21647.181640625,
+ "learning_rate": 5.543297423838464e-05,
+ "loss": 0.414,
+ "step": 110050
+ },
+ {
+ "epoch": 0.5680499017134366,
+ "grad_norm": 23264.748046875,
+ "learning_rate": 5.5396658938405396e-05,
+ "loss": 0.4192,
+ "step": 110100
+ },
+ {
+ "epoch": 0.5683078716960495,
+ "grad_norm": 21868.10546875,
+ "learning_rate": 5.536034075779409e-05,
+ "loss": 0.4222,
+ "step": 110150
+ },
+ {
+ "epoch": 0.5685658416786623,
+ "grad_norm": 22489.07421875,
+ "learning_rate": 5.53240197159367e-05,
+ "loss": 0.4237,
+ "step": 110200
+ },
+ {
+ "epoch": 0.568823811661275,
+ "grad_norm": 21589.173828125,
+ "learning_rate": 5.5287695832220674e-05,
+ "loss": 0.4218,
+ "step": 110250
+ },
+ {
+ "epoch": 0.5690817816438879,
+ "grad_norm": 23184.103515625,
+ "learning_rate": 5.525136912603501e-05,
+ "loss": 0.4203,
+ "step": 110300
+ },
+ {
+ "epoch": 0.5693397516265007,
+ "grad_norm": 23085.970703125,
+ "learning_rate": 5.521503961677019e-05,
+ "loss": 0.4216,
+ "step": 110350
+ },
+ {
+ "epoch": 0.5695977216091136,
+ "grad_norm": 22217.3671875,
+ "learning_rate": 5.51787073238182e-05,
+ "loss": 0.424,
+ "step": 110400
+ },
+ {
+ "epoch": 0.5698556915917263,
+ "grad_norm": 23515.9375,
+ "learning_rate": 5.514237226657253e-05,
+ "loss": 0.4217,
+ "step": 110450
+ },
+ {
+ "epoch": 0.5701136615743392,
+ "grad_norm": 21375.2734375,
+ "learning_rate": 5.510603446442812e-05,
+ "loss": 0.4175,
+ "step": 110500
+ },
+ {
+ "epoch": 0.570371631556952,
+ "grad_norm": 21658.15625,
+ "learning_rate": 5.506969393678139e-05,
+ "loss": 0.4191,
+ "step": 110550
+ },
+ {
+ "epoch": 0.5706296015395649,
+ "grad_norm": 24653.294921875,
+ "learning_rate": 5.503335070303018e-05,
+ "loss": 0.419,
+ "step": 110600
+ },
+ {
+ "epoch": 0.5708875715221777,
+ "grad_norm": 21722.984375,
+ "learning_rate": 5.4997004782573855e-05,
+ "loss": 0.4237,
+ "step": 110650
+ },
+ {
+ "epoch": 0.5711455415047905,
+ "grad_norm": 20897.595703125,
+ "learning_rate": 5.496065619481312e-05,
+ "loss": 0.4211,
+ "step": 110700
+ },
+ {
+ "epoch": 0.5714035114874033,
+ "grad_norm": 20729.123046875,
+ "learning_rate": 5.4924304959150175e-05,
+ "loss": 0.4228,
+ "step": 110750
+ },
+ {
+ "epoch": 0.5716614814700162,
+ "grad_norm": 22107.888671875,
+ "learning_rate": 5.488795109498861e-05,
+ "loss": 0.4222,
+ "step": 110800
+ },
+ {
+ "epoch": 0.571919451452629,
+ "grad_norm": 24264.587890625,
+ "learning_rate": 5.485159462173337e-05,
+ "loss": 0.4232,
+ "step": 110850
+ },
+ {
+ "epoch": 0.5721774214352418,
+ "grad_norm": 23000.34375,
+ "learning_rate": 5.481523555879089e-05,
+ "loss": 0.4236,
+ "step": 110900
+ },
+ {
+ "epoch": 0.5724353914178546,
+ "grad_norm": 20345.26953125,
+ "learning_rate": 5.4778873925568905e-05,
+ "loss": 0.4254,
+ "step": 110950
+ },
+ {
+ "epoch": 0.5726933614004674,
+ "grad_norm": 25514.09765625,
+ "learning_rate": 5.4742509741476566e-05,
+ "loss": 0.4247,
+ "step": 111000
+ },
+ {
+ "epoch": 0.5729513313830803,
+ "grad_norm": 22510.115234375,
+ "learning_rate": 5.470614302592434e-05,
+ "loss": 0.4271,
+ "step": 111050
+ },
+ {
+ "epoch": 0.573209301365693,
+ "grad_norm": 24683.4921875,
+ "learning_rate": 5.466977379832411e-05,
+ "loss": 0.4207,
+ "step": 111100
+ },
+ {
+ "epoch": 0.5734672713483059,
+ "grad_norm": 22154.2890625,
+ "learning_rate": 5.4633402078089035e-05,
+ "loss": 0.422,
+ "step": 111150
+ },
+ {
+ "epoch": 0.5737252413309187,
+ "grad_norm": 23333.966796875,
+ "learning_rate": 5.459702788463367e-05,
+ "loss": 0.4218,
+ "step": 111200
+ },
+ {
+ "epoch": 0.5739832113135316,
+ "grad_norm": 26566.900390625,
+ "learning_rate": 5.4560651237373814e-05,
+ "loss": 0.4269,
+ "step": 111250
+ },
+ {
+ "epoch": 0.5742411812961444,
+ "grad_norm": 21463.828125,
+ "learning_rate": 5.452427215572666e-05,
+ "loss": 0.4196,
+ "step": 111300
+ },
+ {
+ "epoch": 0.5744991512787572,
+ "grad_norm": 24921.373046875,
+ "learning_rate": 5.448789065911064e-05,
+ "loss": 0.4248,
+ "step": 111350
+ },
+ {
+ "epoch": 0.57475712126137,
+ "grad_norm": 23610.16015625,
+ "learning_rate": 5.445150676694548e-05,
+ "loss": 0.4245,
+ "step": 111400
+ },
+ {
+ "epoch": 0.5750150912439829,
+ "grad_norm": 24598.2109375,
+ "learning_rate": 5.441512049865221e-05,
+ "loss": 0.4199,
+ "step": 111450
+ },
+ {
+ "epoch": 0.5752730612265957,
+ "grad_norm": 24330.02734375,
+ "learning_rate": 5.43787318736531e-05,
+ "loss": 0.423,
+ "step": 111500
+ },
+ {
+ "epoch": 0.5755310312092085,
+ "grad_norm": 23434.587890625,
+ "learning_rate": 5.434234091137171e-05,
+ "loss": 0.4214,
+ "step": 111550
+ },
+ {
+ "epoch": 0.5757890011918213,
+ "grad_norm": 25007.08203125,
+ "learning_rate": 5.430594763123283e-05,
+ "loss": 0.4258,
+ "step": 111600
+ },
+ {
+ "epoch": 0.5760469711744342,
+ "grad_norm": 24568.759765625,
+ "learning_rate": 5.4269552052662486e-05,
+ "loss": 0.4248,
+ "step": 111650
+ },
+ {
+ "epoch": 0.576304941157047,
+ "grad_norm": 22131.74609375,
+ "learning_rate": 5.423315419508792e-05,
+ "loss": 0.418,
+ "step": 111700
+ },
+ {
+ "epoch": 0.5765629111396597,
+ "grad_norm": 22058.443359375,
+ "learning_rate": 5.4196754077937626e-05,
+ "loss": 0.4289,
+ "step": 111750
+ },
+ {
+ "epoch": 0.5768208811222726,
+ "grad_norm": 23790.3203125,
+ "learning_rate": 5.4160351720641276e-05,
+ "loss": 0.4217,
+ "step": 111800
+ },
+ {
+ "epoch": 0.5770788511048854,
+ "grad_norm": 20349.287109375,
+ "learning_rate": 5.412394714262974e-05,
+ "loss": 0.4154,
+ "step": 111850
+ },
+ {
+ "epoch": 0.5773368210874983,
+ "grad_norm": 20262.9296875,
+ "learning_rate": 5.408754036333506e-05,
+ "loss": 0.4214,
+ "step": 111900
+ },
+ {
+ "epoch": 0.5775947910701111,
+ "grad_norm": 21678.17578125,
+ "learning_rate": 5.4051131402190494e-05,
+ "loss": 0.4168,
+ "step": 111950
+ },
+ {
+ "epoch": 0.5778527610527239,
+ "grad_norm": 22649.1875,
+ "learning_rate": 5.4014720278630415e-05,
+ "loss": 0.4206,
+ "step": 112000
+ },
+ {
+ "epoch": 0.5781107310353367,
+ "grad_norm": 22304.1328125,
+ "learning_rate": 5.39783070120904e-05,
+ "loss": 0.4199,
+ "step": 112050
+ },
+ {
+ "epoch": 0.5783687010179496,
+ "grad_norm": 22573.169921875,
+ "learning_rate": 5.394189162200715e-05,
+ "loss": 0.4325,
+ "step": 112100
+ },
+ {
+ "epoch": 0.5786266710005624,
+ "grad_norm": 22942.09765625,
+ "learning_rate": 5.390547412781847e-05,
+ "loss": 0.416,
+ "step": 112150
+ },
+ {
+ "epoch": 0.5788846409831752,
+ "grad_norm": 20210.18359375,
+ "learning_rate": 5.386905454896333e-05,
+ "loss": 0.4274,
+ "step": 112200
+ },
+ {
+ "epoch": 0.579142610965788,
+ "grad_norm": 22916.09375,
+ "learning_rate": 5.38326329048818e-05,
+ "loss": 0.4208,
+ "step": 112250
+ },
+ {
+ "epoch": 0.5794005809484009,
+ "grad_norm": 21563.78125,
+ "learning_rate": 5.379620921501503e-05,
+ "loss": 0.4264,
+ "step": 112300
+ },
+ {
+ "epoch": 0.5796585509310137,
+ "grad_norm": 20984.3671875,
+ "learning_rate": 5.375978349880528e-05,
+ "loss": 0.4229,
+ "step": 112350
+ },
+ {
+ "epoch": 0.5799165209136264,
+ "grad_norm": 22014.6640625,
+ "learning_rate": 5.372335577569592e-05,
+ "loss": 0.4205,
+ "step": 112400
+ },
+ {
+ "epoch": 0.5801744908962393,
+ "grad_norm": 22977.39453125,
+ "learning_rate": 5.3686926065131325e-05,
+ "loss": 0.4248,
+ "step": 112450
+ },
+ {
+ "epoch": 0.5804324608788521,
+ "grad_norm": 22589.521484375,
+ "learning_rate": 5.365049438655702e-05,
+ "loss": 0.4165,
+ "step": 112500
+ },
+ {
+ "epoch": 0.580690430861465,
+ "grad_norm": 24455.625,
+ "learning_rate": 5.3614060759419474e-05,
+ "loss": 0.4224,
+ "step": 112550
+ },
+ {
+ "epoch": 0.5809484008440777,
+ "grad_norm": 24485.833984375,
+ "learning_rate": 5.357762520316628e-05,
+ "loss": 0.4264,
+ "step": 112600
+ },
+ {
+ "epoch": 0.5812063708266906,
+ "grad_norm": 23294.244140625,
+ "learning_rate": 5.354118773724603e-05,
+ "loss": 0.4254,
+ "step": 112650
+ },
+ {
+ "epoch": 0.5814643408093034,
+ "grad_norm": 21813.884765625,
+ "learning_rate": 5.350474838110835e-05,
+ "loss": 0.4226,
+ "step": 112700
+ },
+ {
+ "epoch": 0.5817223107919163,
+ "grad_norm": 23532.0546875,
+ "learning_rate": 5.3468307154203836e-05,
+ "loss": 0.4236,
+ "step": 112750
+ },
+ {
+ "epoch": 0.5819802807745291,
+ "grad_norm": 24739.787109375,
+ "learning_rate": 5.343186407598413e-05,
+ "loss": 0.4276,
+ "step": 112800
+ },
+ {
+ "epoch": 0.5822382507571419,
+ "grad_norm": 23312.783203125,
+ "learning_rate": 5.339541916590184e-05,
+ "loss": 0.4281,
+ "step": 112850
+ },
+ {
+ "epoch": 0.5824962207397547,
+ "grad_norm": 24166.798828125,
+ "learning_rate": 5.335897244341054e-05,
+ "loss": 0.4238,
+ "step": 112900
+ },
+ {
+ "epoch": 0.5827541907223676,
+ "grad_norm": 23690.455078125,
+ "learning_rate": 5.332252392796478e-05,
+ "loss": 0.4181,
+ "step": 112950
+ },
+ {
+ "epoch": 0.5830121607049804,
+ "grad_norm": 23499.16015625,
+ "learning_rate": 5.32860736390201e-05,
+ "loss": 0.4143,
+ "step": 113000
+ },
+ {
+ "epoch": 0.5832701306875931,
+ "grad_norm": 23299.5625,
+ "learning_rate": 5.324962159603294e-05,
+ "loss": 0.4198,
+ "step": 113050
+ },
+ {
+ "epoch": 0.583528100670206,
+ "grad_norm": 22958.423828125,
+ "learning_rate": 5.321316781846071e-05,
+ "loss": 0.421,
+ "step": 113100
+ },
+ {
+ "epoch": 0.5837860706528188,
+ "grad_norm": 20775.119140625,
+ "learning_rate": 5.3176712325761704e-05,
+ "loss": 0.4148,
+ "step": 113150
+ },
+ {
+ "epoch": 0.5840440406354317,
+ "grad_norm": 23139.953125,
+ "learning_rate": 5.3140255137395155e-05,
+ "loss": 0.422,
+ "step": 113200
+ },
+ {
+ "epoch": 0.5843020106180444,
+ "grad_norm": 19829.94140625,
+ "learning_rate": 5.310379627282125e-05,
+ "loss": 0.4248,
+ "step": 113250
+ },
+ {
+ "epoch": 0.5845599806006573,
+ "grad_norm": 20085.572265625,
+ "learning_rate": 5.306733575150099e-05,
+ "loss": 0.4183,
+ "step": 113300
+ },
+ {
+ "epoch": 0.5848179505832701,
+ "grad_norm": 23206.27734375,
+ "learning_rate": 5.303087359289629e-05,
+ "loss": 0.423,
+ "step": 113350
+ },
+ {
+ "epoch": 0.585075920565883,
+ "grad_norm": 25039.34765625,
+ "learning_rate": 5.299440981646996e-05,
+ "loss": 0.4232,
+ "step": 113400
+ },
+ {
+ "epoch": 0.5853338905484958,
+ "grad_norm": 21276.865234375,
+ "learning_rate": 5.2957944441685646e-05,
+ "loss": 0.4205,
+ "step": 113450
+ },
+ {
+ "epoch": 0.5855918605311086,
+ "grad_norm": 22706.712890625,
+ "learning_rate": 5.292147748800788e-05,
+ "loss": 0.4225,
+ "step": 113500
+ },
+ {
+ "epoch": 0.5858498305137214,
+ "grad_norm": 18046.537109375,
+ "learning_rate": 5.2885008974902004e-05,
+ "loss": 0.4195,
+ "step": 113550
+ },
+ {
+ "epoch": 0.5861078004963343,
+ "grad_norm": 22363.5625,
+ "learning_rate": 5.28485389218342e-05,
+ "loss": 0.4149,
+ "step": 113600
+ },
+ {
+ "epoch": 0.5863657704789471,
+ "grad_norm": 24409.609375,
+ "learning_rate": 5.2812067348271466e-05,
+ "loss": 0.4224,
+ "step": 113650
+ },
+ {
+ "epoch": 0.5866237404615599,
+ "grad_norm": 23921.68359375,
+ "learning_rate": 5.277559427368164e-05,
+ "loss": 0.4274,
+ "step": 113700
+ },
+ {
+ "epoch": 0.5868817104441727,
+ "grad_norm": 23887.84765625,
+ "learning_rate": 5.273911971753335e-05,
+ "loss": 0.4185,
+ "step": 113750
+ },
+ {
+ "epoch": 0.5871396804267855,
+ "grad_norm": 23169.423828125,
+ "learning_rate": 5.270264369929597e-05,
+ "loss": 0.4218,
+ "step": 113800
+ },
+ {
+ "epoch": 0.5873976504093984,
+ "grad_norm": 23339.57421875,
+ "learning_rate": 5.266616623843972e-05,
+ "loss": 0.4211,
+ "step": 113850
+ },
+ {
+ "epoch": 0.5876556203920111,
+ "grad_norm": 22072.59765625,
+ "learning_rate": 5.2629687354435576e-05,
+ "loss": 0.4191,
+ "step": 113900
+ },
+ {
+ "epoch": 0.587913590374624,
+ "grad_norm": 24308.357421875,
+ "learning_rate": 5.259320706675523e-05,
+ "loss": 0.4168,
+ "step": 113950
+ },
+ {
+ "epoch": 0.5881715603572368,
+ "grad_norm": 20896.5703125,
+ "learning_rate": 5.255672539487119e-05,
+ "loss": 0.4201,
+ "step": 114000
+ },
+ {
+ "epoch": 0.5884295303398497,
+ "grad_norm": 20070.814453125,
+ "learning_rate": 5.252024235825661e-05,
+ "loss": 0.4216,
+ "step": 114050
+ },
+ {
+ "epoch": 0.5886875003224625,
+ "grad_norm": 24864.869140625,
+ "learning_rate": 5.2483757976385486e-05,
+ "loss": 0.4269,
+ "step": 114100
+ },
+ {
+ "epoch": 0.5889454703050753,
+ "grad_norm": 24734.0234375,
+ "learning_rate": 5.2447272268732436e-05,
+ "loss": 0.4196,
+ "step": 114150
+ },
+ {
+ "epoch": 0.5892034402876881,
+ "grad_norm": 22383.212890625,
+ "learning_rate": 5.2410785254772856e-05,
+ "loss": 0.4171,
+ "step": 114200
+ },
+ {
+ "epoch": 0.589461410270301,
+ "grad_norm": 22587.44921875,
+ "learning_rate": 5.237429695398276e-05,
+ "loss": 0.4232,
+ "step": 114250
+ },
+ {
+ "epoch": 0.5897193802529138,
+ "grad_norm": 23357.263671875,
+ "learning_rate": 5.2337807385838935e-05,
+ "loss": 0.4241,
+ "step": 114300
+ },
+ {
+ "epoch": 0.5899773502355266,
+ "grad_norm": 24632.125,
+ "learning_rate": 5.23013165698188e-05,
+ "loss": 0.4154,
+ "step": 114350
+ },
+ {
+ "epoch": 0.5902353202181394,
+ "grad_norm": 23191.818359375,
+ "learning_rate": 5.226482452540045e-05,
+ "loss": 0.424,
+ "step": 114400
+ },
+ {
+ "epoch": 0.5904932902007523,
+ "grad_norm": 23649.560546875,
+ "learning_rate": 5.2228331272062626e-05,
+ "loss": 0.427,
+ "step": 114450
+ },
+ {
+ "epoch": 0.5907512601833651,
+ "grad_norm": 23992.169921875,
+ "learning_rate": 5.21918368292847e-05,
+ "loss": 0.4267,
+ "step": 114500
+ },
+ {
+ "epoch": 0.5910092301659778,
+ "grad_norm": 21792.041015625,
+ "learning_rate": 5.215534121654673e-05,
+ "loss": 0.4272,
+ "step": 114550
+ },
+ {
+ "epoch": 0.5912672001485907,
+ "grad_norm": 25516.345703125,
+ "learning_rate": 5.211884445332935e-05,
+ "loss": 0.4207,
+ "step": 114600
+ },
+ {
+ "epoch": 0.5915251701312035,
+ "grad_norm": 22557.25390625,
+ "learning_rate": 5.208234655911384e-05,
+ "loss": 0.4228,
+ "step": 114650
+ },
+ {
+ "epoch": 0.5917831401138164,
+ "grad_norm": 24185.09765625,
+ "learning_rate": 5.2045847553382045e-05,
+ "loss": 0.4226,
+ "step": 114700
+ },
+ {
+ "epoch": 0.5920411100964291,
+ "grad_norm": 20565.134765625,
+ "learning_rate": 5.200934745561643e-05,
+ "loss": 0.4274,
+ "step": 114750
+ },
+ {
+ "epoch": 0.592299080079042,
+ "grad_norm": 24019.0,
+ "learning_rate": 5.197284628530007e-05,
+ "loss": 0.4234,
+ "step": 114800
+ },
+ {
+ "epoch": 0.5925570500616548,
+ "grad_norm": 26129.01171875,
+ "learning_rate": 5.193634406191658e-05,
+ "loss": 0.418,
+ "step": 114850
+ },
+ {
+ "epoch": 0.5928150200442677,
+ "grad_norm": 25187.611328125,
+ "learning_rate": 5.18998408049501e-05,
+ "loss": 0.4213,
+ "step": 114900
+ },
+ {
+ "epoch": 0.5930729900268805,
+ "grad_norm": 20145.14453125,
+ "learning_rate": 5.186333653388539e-05,
+ "loss": 0.418,
+ "step": 114950
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "grad_norm": 22472.322265625,
+ "learning_rate": 5.182683126820773e-05,
+ "loss": 0.4209,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5933309600094933,
+ "eval_loss": 0.4084908068180084,
+ "eval_runtime": 3582.6916,
+ "eval_samples_per_second": 865.584,
+ "eval_steps_per_second": 1.691,
+ "step": 115000
+ },
+ {
+ "epoch": 0.5935889299921061,
+ "grad_norm": 22404.052734375,
+ "learning_rate": 5.179032502740291e-05,
+ "loss": 0.4147,
+ "step": 115050
+ },
+ {
+ "epoch": 0.593846899974719,
+ "grad_norm": 21242.705078125,
+ "learning_rate": 5.175381783095725e-05,
+ "loss": 0.4237,
+ "step": 115100
+ },
+ {
+ "epoch": 0.5941048699573318,
+ "grad_norm": 22416.06640625,
+ "learning_rate": 5.171730969835758e-05,
+ "loss": 0.4185,
+ "step": 115150
+ },
+ {
+ "epoch": 0.5943628399399445,
+ "grad_norm": 22231.525390625,
+ "learning_rate": 5.1680800649091276e-05,
+ "loss": 0.4227,
+ "step": 115200
+ },
+ {
+ "epoch": 0.5946208099225574,
+ "grad_norm": 22431.462890625,
+ "learning_rate": 5.164429070264613e-05,
+ "loss": 0.4225,
+ "step": 115250
+ },
+ {
+ "epoch": 0.5948787799051702,
+ "grad_norm": 26008.57421875,
+ "learning_rate": 5.160777987851044e-05,
+ "loss": 0.4253,
+ "step": 115300
+ },
+ {
+ "epoch": 0.5951367498877831,
+ "grad_norm": 22555.501953125,
+ "learning_rate": 5.157126819617297e-05,
+ "loss": 0.4181,
+ "step": 115350
+ },
+ {
+ "epoch": 0.5953947198703958,
+ "grad_norm": 25113.587890625,
+ "learning_rate": 5.153475567512298e-05,
+ "loss": 0.4261,
+ "step": 115400
+ },
+ {
+ "epoch": 0.5956526898530087,
+ "grad_norm": 22877.908203125,
+ "learning_rate": 5.149824233485013e-05,
+ "loss": 0.4177,
+ "step": 115450
+ },
+ {
+ "epoch": 0.5959106598356215,
+ "grad_norm": 22468.34375,
+ "learning_rate": 5.1461728194844526e-05,
+ "loss": 0.4223,
+ "step": 115500
+ },
+ {
+ "epoch": 0.5961686298182344,
+ "grad_norm": 24525.326171875,
+ "learning_rate": 5.142521327459672e-05,
+ "loss": 0.4159,
+ "step": 115550
+ },
+ {
+ "epoch": 0.5964265998008472,
+ "grad_norm": 23334.296875,
+ "learning_rate": 5.1388697593597643e-05,
+ "loss": 0.4206,
+ "step": 115600
+ },
+ {
+ "epoch": 0.59668456978346,
+ "grad_norm": 21743.333984375,
+ "learning_rate": 5.1352181171338706e-05,
+ "loss": 0.4191,
+ "step": 115650
+ },
+ {
+ "epoch": 0.5969425397660728,
+ "grad_norm": 26287.66796875,
+ "learning_rate": 5.131566402731165e-05,
+ "loss": 0.4147,
+ "step": 115700
+ },
+ {
+ "epoch": 0.5972005097486857,
+ "grad_norm": 20856.890625,
+ "learning_rate": 5.12791461810086e-05,
+ "loss": 0.4248,
+ "step": 115750
+ },
+ {
+ "epoch": 0.5974584797312985,
+ "grad_norm": 22821.73046875,
+ "learning_rate": 5.124262765192208e-05,
+ "loss": 0.4239,
+ "step": 115800
+ },
+ {
+ "epoch": 0.5977164497139112,
+ "grad_norm": 24805.427734375,
+ "learning_rate": 5.1206108459545e-05,
+ "loss": 0.4172,
+ "step": 115850
+ },
+ {
+ "epoch": 0.5979744196965241,
+ "grad_norm": 25195.064453125,
+ "learning_rate": 5.116958862337057e-05,
+ "loss": 0.4242,
+ "step": 115900
+ },
+ {
+ "epoch": 0.5982323896791369,
+ "grad_norm": 22029.236328125,
+ "learning_rate": 5.1133068162892383e-05,
+ "loss": 0.4217,
+ "step": 115950
+ },
+ {
+ "epoch": 0.5984903596617498,
+ "grad_norm": 23296.77734375,
+ "learning_rate": 5.109654709760434e-05,
+ "loss": 0.4223,
+ "step": 116000
+ },
+ {
+ "epoch": 0.5987483296443625,
+ "grad_norm": 23822.447265625,
+ "learning_rate": 5.106002544700069e-05,
+ "loss": 0.4235,
+ "step": 116050
+ },
+ {
+ "epoch": 0.5990062996269754,
+ "grad_norm": 21188.46875,
+ "learning_rate": 5.1023503230576e-05,
+ "loss": 0.4275,
+ "step": 116100
+ },
+ {
+ "epoch": 0.5992642696095882,
+ "grad_norm": 24459.021484375,
+ "learning_rate": 5.0986980467825096e-05,
+ "loss": 0.4217,
+ "step": 116150
+ },
+ {
+ "epoch": 0.5995222395922011,
+ "grad_norm": 22304.396484375,
+ "learning_rate": 5.095045717824313e-05,
+ "loss": 0.42,
+ "step": 116200
+ },
+ {
+ "epoch": 0.5997802095748139,
+ "grad_norm": 20124.943359375,
+ "learning_rate": 5.0913933381325516e-05,
+ "loss": 0.4149,
+ "step": 116250
+ },
+ {
+ "epoch": 0.6000381795574267,
+ "grad_norm": 22610.3046875,
+ "learning_rate": 5.087740909656798e-05,
+ "loss": 0.4164,
+ "step": 116300
+ },
+ {
+ "epoch": 0.6002961495400395,
+ "grad_norm": 22058.974609375,
+ "learning_rate": 5.084088434346645e-05,
+ "loss": 0.4211,
+ "step": 116350
+ },
+ {
+ "epoch": 0.6005541195226524,
+ "grad_norm": 23463.626953125,
+ "learning_rate": 5.0804359141517134e-05,
+ "loss": 0.4182,
+ "step": 116400
+ },
+ {
+ "epoch": 0.6008120895052652,
+ "grad_norm": 25045.67578125,
+ "learning_rate": 5.076783351021648e-05,
+ "loss": 0.4202,
+ "step": 116450
+ },
+ {
+ "epoch": 0.601070059487878,
+ "grad_norm": 22583.076171875,
+ "learning_rate": 5.0731307469061184e-05,
+ "loss": 0.4177,
+ "step": 116500
+ },
+ {
+ "epoch": 0.6013280294704908,
+ "grad_norm": 26350.400390625,
+ "learning_rate": 5.069478103754811e-05,
+ "loss": 0.4193,
+ "step": 116550
+ },
+ {
+ "epoch": 0.6015859994531036,
+ "grad_norm": 21430.255859375,
+ "learning_rate": 5.0658254235174385e-05,
+ "loss": 0.422,
+ "step": 116600
+ },
+ {
+ "epoch": 0.6018439694357165,
+ "grad_norm": 21467.482421875,
+ "learning_rate": 5.0621727081437275e-05,
+ "loss": 0.4157,
+ "step": 116650
+ },
+ {
+ "epoch": 0.6021019394183292,
+ "grad_norm": 25780.095703125,
+ "learning_rate": 5.05851995958343e-05,
+ "loss": 0.4243,
+ "step": 116700
+ },
+ {
+ "epoch": 0.6023599094009421,
+ "grad_norm": 21074.52734375,
+ "learning_rate": 5.0548671797863125e-05,
+ "loss": 0.4271,
+ "step": 116750
+ },
+ {
+ "epoch": 0.6026178793835549,
+ "grad_norm": 25752.841796875,
+ "learning_rate": 5.051214370702155e-05,
+ "loss": 0.4209,
+ "step": 116800
+ },
+ {
+ "epoch": 0.6028758493661678,
+ "grad_norm": 23178.93359375,
+ "learning_rate": 5.047561534280758e-05,
+ "loss": 0.4232,
+ "step": 116850
+ },
+ {
+ "epoch": 0.6031338193487806,
+ "grad_norm": 23263.65625,
+ "learning_rate": 5.0439086724719355e-05,
+ "loss": 0.4196,
+ "step": 116900
+ },
+ {
+ "epoch": 0.6033917893313934,
+ "grad_norm": 20372.861328125,
+ "learning_rate": 5.040255787225514e-05,
+ "loss": 0.4194,
+ "step": 116950
+ },
+ {
+ "epoch": 0.6036497593140062,
+ "grad_norm": 23453.59375,
+ "learning_rate": 5.036602880491332e-05,
+ "loss": 0.4156,
+ "step": 117000
+ },
+ {
+ "epoch": 0.6039077292966191,
+ "grad_norm": 24039.392578125,
+ "learning_rate": 5.032949954219243e-05,
+ "loss": 0.4192,
+ "step": 117050
+ },
+ {
+ "epoch": 0.6041656992792319,
+ "grad_norm": 24246.55859375,
+ "learning_rate": 5.0292970103591044e-05,
+ "loss": 0.426,
+ "step": 117100
+ },
+ {
+ "epoch": 0.6044236692618447,
+ "grad_norm": 23403.130859375,
+ "learning_rate": 5.0256440508607904e-05,
+ "loss": 0.4195,
+ "step": 117150
+ },
+ {
+ "epoch": 0.6046816392444575,
+ "grad_norm": 21872.07421875,
+ "learning_rate": 5.021991077674179e-05,
+ "loss": 0.4214,
+ "step": 117200
+ },
+ {
+ "epoch": 0.6049396092270704,
+ "grad_norm": 22344.455078125,
+ "learning_rate": 5.018338092749155e-05,
+ "loss": 0.4205,
+ "step": 117250
+ },
+ {
+ "epoch": 0.6051975792096832,
+ "grad_norm": 22999.099609375,
+ "learning_rate": 5.014685098035612e-05,
+ "loss": 0.4203,
+ "step": 117300
+ },
+ {
+ "epoch": 0.6054555491922959,
+ "grad_norm": 21572.994140625,
+ "learning_rate": 5.011032095483448e-05,
+ "loss": 0.4279,
+ "step": 117350
+ },
+ {
+ "epoch": 0.6057135191749088,
+ "grad_norm": 21263.11328125,
+ "learning_rate": 5.007379087042566e-05,
+ "loss": 0.418,
+ "step": 117400
+ },
+ {
+ "epoch": 0.6059714891575216,
+ "grad_norm": 22789.671875,
+ "learning_rate": 5.00372607466287e-05,
+ "loss": 0.4196,
+ "step": 117450
+ },
+ {
+ "epoch": 0.6062294591401345,
+ "grad_norm": 21276.09765625,
+ "learning_rate": 5.000073060294267e-05,
+ "loss": 0.4125,
+ "step": 117500
+ },
+ {
+ "epoch": 0.6064874291227472,
+ "grad_norm": 22501.169921875,
+ "learning_rate": 4.9964200458866654e-05,
+ "loss": 0.4152,
+ "step": 117550
+ },
+ {
+ "epoch": 0.6067453991053601,
+ "grad_norm": 21645.912109375,
+ "learning_rate": 4.992767033389976e-05,
+ "loss": 0.4253,
+ "step": 117600
+ },
+ {
+ "epoch": 0.6070033690879729,
+ "grad_norm": 21256.7109375,
+ "learning_rate": 4.9891140247541025e-05,
+ "loss": 0.4214,
+ "step": 117650
+ },
+ {
+ "epoch": 0.6072613390705858,
+ "grad_norm": 22883.98046875,
+ "learning_rate": 4.985461021928952e-05,
+ "loss": 0.4238,
+ "step": 117700
+ },
+ {
+ "epoch": 0.6075193090531986,
+ "grad_norm": 21366.412109375,
+ "learning_rate": 4.981808026864426e-05,
+ "loss": 0.4225,
+ "step": 117750
+ },
+ {
+ "epoch": 0.6077772790358114,
+ "grad_norm": 24185.3515625,
+ "learning_rate": 4.978155041510425e-05,
+ "loss": 0.4196,
+ "step": 117800
+ },
+ {
+ "epoch": 0.6080352490184242,
+ "grad_norm": 21638.009765625,
+ "learning_rate": 4.974502067816838e-05,
+ "loss": 0.4221,
+ "step": 117850
+ },
+ {
+ "epoch": 0.608293219001037,
+ "grad_norm": 20867.111328125,
+ "learning_rate": 4.970849107733554e-05,
+ "loss": 0.4225,
+ "step": 117900
+ },
+ {
+ "epoch": 0.6085511889836499,
+ "grad_norm": 21785.69140625,
+ "learning_rate": 4.967196163210451e-05,
+ "loss": 0.4166,
+ "step": 117950
+ },
+ {
+ "epoch": 0.6088091589662626,
+ "grad_norm": 24691.8515625,
+ "learning_rate": 4.963543236197401e-05,
+ "loss": 0.4226,
+ "step": 118000
+ },
+ {
+ "epoch": 0.6090671289488755,
+ "grad_norm": 21214.1484375,
+ "learning_rate": 4.9598903286442675e-05,
+ "loss": 0.418,
+ "step": 118050
+ },
+ {
+ "epoch": 0.6093250989314883,
+ "grad_norm": 22802.849609375,
+ "learning_rate": 4.956237442500898e-05,
+ "loss": 0.4227,
+ "step": 118100
+ },
+ {
+ "epoch": 0.6095830689141012,
+ "grad_norm": 25204.90625,
+ "learning_rate": 4.952584579717135e-05,
+ "loss": 0.4152,
+ "step": 118150
+ },
+ {
+ "epoch": 0.6098410388967139,
+ "grad_norm": 21970.19140625,
+ "learning_rate": 4.9489317422428044e-05,
+ "loss": 0.4197,
+ "step": 118200
+ },
+ {
+ "epoch": 0.6100990088793268,
+ "grad_norm": 22331.013671875,
+ "learning_rate": 4.945278932027723e-05,
+ "loss": 0.4161,
+ "step": 118250
+ },
+ {
+ "epoch": 0.6103569788619396,
+ "grad_norm": 27234.177734375,
+ "learning_rate": 4.941626151021686e-05,
+ "loss": 0.4204,
+ "step": 118300
+ },
+ {
+ "epoch": 0.6106149488445525,
+ "grad_norm": 22253.0078125,
+ "learning_rate": 4.937973401174481e-05,
+ "loss": 0.4202,
+ "step": 118350
+ },
+ {
+ "epoch": 0.6108729188271653,
+ "grad_norm": 20930.27734375,
+ "learning_rate": 4.934320684435871e-05,
+ "loss": 0.4169,
+ "step": 118400
+ },
+ {
+ "epoch": 0.6111308888097781,
+ "grad_norm": 22569.205078125,
+ "learning_rate": 4.930668002755609e-05,
+ "loss": 0.4177,
+ "step": 118450
+ },
+ {
+ "epoch": 0.6113888587923909,
+ "grad_norm": 23197.943359375,
+ "learning_rate": 4.9270153580834256e-05,
+ "loss": 0.414,
+ "step": 118500
+ },
+ {
+ "epoch": 0.6116468287750038,
+ "grad_norm": 21927.1875,
+ "learning_rate": 4.923362752369029e-05,
+ "loss": 0.4203,
+ "step": 118550
+ },
+ {
+ "epoch": 0.6119047987576166,
+ "grad_norm": 23422.181640625,
+ "learning_rate": 4.919710187562112e-05,
+ "loss": 0.4213,
+ "step": 118600
+ },
+ {
+ "epoch": 0.6121627687402293,
+ "grad_norm": 23351.67578125,
+ "learning_rate": 4.9160576656123416e-05,
+ "loss": 0.4213,
+ "step": 118650
+ },
+ {
+ "epoch": 0.6124207387228422,
+ "grad_norm": 21228.416015625,
+ "learning_rate": 4.9124051884693664e-05,
+ "loss": 0.4192,
+ "step": 118700
+ },
+ {
+ "epoch": 0.612678708705455,
+ "grad_norm": 22555.9609375,
+ "learning_rate": 4.908752758082802e-05,
+ "loss": 0.4189,
+ "step": 118750
+ },
+ {
+ "epoch": 0.6129366786880679,
+ "grad_norm": 21010.859375,
+ "learning_rate": 4.905100376402251e-05,
+ "loss": 0.4194,
+ "step": 118800
+ },
+ {
+ "epoch": 0.6131946486706806,
+ "grad_norm": 23468.78515625,
+ "learning_rate": 4.901448045377279e-05,
+ "loss": 0.4151,
+ "step": 118850
+ },
+ {
+ "epoch": 0.6134526186532935,
+ "grad_norm": 23818.189453125,
+ "learning_rate": 4.8977957669574334e-05,
+ "loss": 0.4184,
+ "step": 118900
+ },
+ {
+ "epoch": 0.6137105886359063,
+ "grad_norm": 22162.76171875,
+ "learning_rate": 4.8941435430922294e-05,
+ "loss": 0.4181,
+ "step": 118950
+ },
+ {
+ "epoch": 0.6139685586185192,
+ "grad_norm": 22983.45703125,
+ "learning_rate": 4.8904913757311506e-05,
+ "loss": 0.4196,
+ "step": 119000
+ },
+ {
+ "epoch": 0.614226528601132,
+ "grad_norm": 22748.150390625,
+ "learning_rate": 4.886839266823656e-05,
+ "loss": 0.4195,
+ "step": 119050
+ },
+ {
+ "epoch": 0.6144844985837448,
+ "grad_norm": 23146.306640625,
+ "learning_rate": 4.8831872183191684e-05,
+ "loss": 0.4219,
+ "step": 119100
+ },
+ {
+ "epoch": 0.6147424685663576,
+ "grad_norm": 24951.591796875,
+ "learning_rate": 4.879535232167084e-05,
+ "loss": 0.4165,
+ "step": 119150
+ },
+ {
+ "epoch": 0.6150004385489705,
+ "grad_norm": 24381.689453125,
+ "learning_rate": 4.875883310316758e-05,
+ "loss": 0.4179,
+ "step": 119200
+ },
+ {
+ "epoch": 0.6152584085315833,
+ "grad_norm": 21191.4609375,
+ "learning_rate": 4.872231454717518e-05,
+ "loss": 0.4155,
+ "step": 119250
+ },
+ {
+ "epoch": 0.615516378514196,
+ "grad_norm": 21586.84375,
+ "learning_rate": 4.8685796673186526e-05,
+ "loss": 0.412,
+ "step": 119300
+ },
+ {
+ "epoch": 0.6157743484968089,
+ "grad_norm": 20381.505859375,
+ "learning_rate": 4.864927950069416e-05,
+ "loss": 0.4171,
+ "step": 119350
+ },
+ {
+ "epoch": 0.6160323184794217,
+ "grad_norm": 23258.296875,
+ "learning_rate": 4.861276304919026e-05,
+ "loss": 0.418,
+ "step": 119400
+ },
+ {
+ "epoch": 0.6162902884620346,
+ "grad_norm": 23629.14453125,
+ "learning_rate": 4.857624733816657e-05,
+ "loss": 0.4221,
+ "step": 119450
+ },
+ {
+ "epoch": 0.6165482584446473,
+ "grad_norm": 22892.7734375,
+ "learning_rate": 4.853973238711449e-05,
+ "loss": 0.4278,
+ "step": 119500
+ },
+ {
+ "epoch": 0.6168062284272602,
+ "grad_norm": 21639.669921875,
+ "learning_rate": 4.850321821552497e-05,
+ "loss": 0.4224,
+ "step": 119550
+ },
+ {
+ "epoch": 0.617064198409873,
+ "grad_norm": 21392.951171875,
+ "learning_rate": 4.84667048428886e-05,
+ "loss": 0.4192,
+ "step": 119600
+ },
+ {
+ "epoch": 0.6173221683924859,
+ "grad_norm": 22603.51953125,
+ "learning_rate": 4.843019228869548e-05,
+ "loss": 0.4169,
+ "step": 119650
+ },
+ {
+ "epoch": 0.6175801383750986,
+ "grad_norm": 22470.62109375,
+ "learning_rate": 4.8393680572435324e-05,
+ "loss": 0.4175,
+ "step": 119700
+ },
+ {
+ "epoch": 0.6178381083577115,
+ "grad_norm": 26185.634765625,
+ "learning_rate": 4.835716971359737e-05,
+ "loss": 0.4208,
+ "step": 119750
+ },
+ {
+ "epoch": 0.6180960783403243,
+ "grad_norm": 21508.12109375,
+ "learning_rate": 4.832065973167041e-05,
+ "loss": 0.4194,
+ "step": 119800
+ },
+ {
+ "epoch": 0.6183540483229372,
+ "grad_norm": 20717.205078125,
+ "learning_rate": 4.8284150646142784e-05,
+ "loss": 0.424,
+ "step": 119850
+ },
+ {
+ "epoch": 0.61861201830555,
+ "grad_norm": 20015.30078125,
+ "learning_rate": 4.8247642476502284e-05,
+ "loss": 0.4189,
+ "step": 119900
+ },
+ {
+ "epoch": 0.6188699882881628,
+ "grad_norm": 21596.349609375,
+ "learning_rate": 4.821113524223634e-05,
+ "loss": 0.4218,
+ "step": 119950
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "grad_norm": 22051.921875,
+ "learning_rate": 4.817462896283173e-05,
+ "loss": 0.4184,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6191279582707756,
+ "eval_loss": 0.40621376037597656,
+ "eval_runtime": 3588.5932,
+ "eval_samples_per_second": 864.16,
+ "eval_steps_per_second": 1.688,
+ "step": 120000
+ },
+ {
+ "epoch": 0.6193859282533885,
+ "grad_norm": 22562.478515625,
+ "learning_rate": 4.813812365777486e-05,
+ "loss": 0.4171,
+ "step": 120050
+ },
+ {
+ "epoch": 0.6196438982360013,
+ "grad_norm": 22531.505859375,
+ "learning_rate": 4.81016193465515e-05,
+ "loss": 0.4171,
+ "step": 120100
+ },
+ {
+ "epoch": 0.619901868218614,
+ "grad_norm": 21869.177734375,
+ "learning_rate": 4.8065116048647005e-05,
+ "loss": 0.4184,
+ "step": 120150
+ },
+ {
+ "epoch": 0.6201598382012269,
+ "grad_norm": 23087.56640625,
+ "learning_rate": 4.802861378354607e-05,
+ "loss": 0.4177,
+ "step": 120200
+ },
+ {
+ "epoch": 0.6204178081838397,
+ "grad_norm": 22546.060546875,
+ "learning_rate": 4.7992112570732925e-05,
+ "loss": 0.4213,
+ "step": 120250
+ },
+ {
+ "epoch": 0.6206757781664526,
+ "grad_norm": 22802.8984375,
+ "learning_rate": 4.795561242969122e-05,
+ "loss": 0.4218,
+ "step": 120300
+ },
+ {
+ "epoch": 0.6209337481490653,
+ "grad_norm": 19467.32421875,
+ "learning_rate": 4.791911337990401e-05,
+ "loss": 0.4141,
+ "step": 120350
+ },
+ {
+ "epoch": 0.6211917181316782,
+ "grad_norm": 25076.169921875,
+ "learning_rate": 4.78826154408538e-05,
+ "loss": 0.4178,
+ "step": 120400
+ },
+ {
+ "epoch": 0.621449688114291,
+ "grad_norm": 20815.935546875,
+ "learning_rate": 4.784611863202244e-05,
+ "loss": 0.4217,
+ "step": 120450
+ },
+ {
+ "epoch": 0.6217076580969039,
+ "grad_norm": 21686.271484375,
+ "learning_rate": 4.780962297289126e-05,
+ "loss": 0.4124,
+ "step": 120500
+ },
+ {
+ "epoch": 0.6219656280795167,
+ "grad_norm": 22759.310546875,
+ "learning_rate": 4.777312848294092e-05,
+ "loss": 0.4159,
+ "step": 120550
+ },
+ {
+ "epoch": 0.6222235980621295,
+ "grad_norm": 25325.75390625,
+ "learning_rate": 4.773663518165148e-05,
+ "loss": 0.4176,
+ "step": 120600
+ },
+ {
+ "epoch": 0.6224815680447423,
+ "grad_norm": 23474.958984375,
+ "learning_rate": 4.7700143088502344e-05,
+ "loss": 0.4143,
+ "step": 120650
+ },
+ {
+ "epoch": 0.6227395380273552,
+ "grad_norm": 25355.40625,
+ "learning_rate": 4.766365222297229e-05,
+ "loss": 0.4262,
+ "step": 120700
+ },
+ {
+ "epoch": 0.622997508009968,
+ "grad_norm": 22215.14453125,
+ "learning_rate": 4.762716260453945e-05,
+ "loss": 0.4149,
+ "step": 120750
+ },
+ {
+ "epoch": 0.6232554779925807,
+ "grad_norm": 23521.607421875,
+ "learning_rate": 4.759067425268126e-05,
+ "loss": 0.4223,
+ "step": 120800
+ },
+ {
+ "epoch": 0.6235134479751936,
+ "grad_norm": 24524.02734375,
+ "learning_rate": 4.7554187186874513e-05,
+ "loss": 0.4256,
+ "step": 120850
+ },
+ {
+ "epoch": 0.6237714179578064,
+ "grad_norm": 19954.322265625,
+ "learning_rate": 4.7517701426595266e-05,
+ "loss": 0.4119,
+ "step": 120900
+ },
+ {
+ "epoch": 0.6240293879404193,
+ "grad_norm": 21612.1953125,
+ "learning_rate": 4.748121699131893e-05,
+ "loss": 0.4196,
+ "step": 120950
+ },
+ {
+ "epoch": 0.624287357923032,
+ "grad_norm": 20466.0,
+ "learning_rate": 4.744473390052019e-05,
+ "loss": 0.4181,
+ "step": 121000
+ },
+ {
+ "epoch": 0.6245453279056449,
+ "grad_norm": 19992.173828125,
+ "learning_rate": 4.740825217367304e-05,
+ "loss": 0.4159,
+ "step": 121050
+ },
+ {
+ "epoch": 0.6248032978882577,
+ "grad_norm": 21553.1328125,
+ "learning_rate": 4.737177183025067e-05,
+ "loss": 0.4157,
+ "step": 121100
+ },
+ {
+ "epoch": 0.6250612678708706,
+ "grad_norm": 22242.568359375,
+ "learning_rate": 4.73352928897256e-05,
+ "loss": 0.4153,
+ "step": 121150
+ },
+ {
+ "epoch": 0.6253192378534834,
+ "grad_norm": 23883.212890625,
+ "learning_rate": 4.7298815371569606e-05,
+ "loss": 0.4173,
+ "step": 121200
+ },
+ {
+ "epoch": 0.6255772078360962,
+ "grad_norm": 22386.505859375,
+ "learning_rate": 4.7262339295253645e-05,
+ "loss": 0.4178,
+ "step": 121250
+ },
+ {
+ "epoch": 0.625835177818709,
+ "grad_norm": 22051.859375,
+ "learning_rate": 4.722586468024797e-05,
+ "loss": 0.4111,
+ "step": 121300
+ },
+ {
+ "epoch": 0.6260931478013219,
+ "grad_norm": 21374.4765625,
+ "learning_rate": 4.7189391546021996e-05,
+ "loss": 0.418,
+ "step": 121350
+ },
+ {
+ "epoch": 0.6263511177839347,
+ "grad_norm": 22240.453125,
+ "learning_rate": 4.7152919912044406e-05,
+ "loss": 0.4196,
+ "step": 121400
+ },
+ {
+ "epoch": 0.6266090877665474,
+ "grad_norm": 26278.798828125,
+ "learning_rate": 4.711644979778302e-05,
+ "loss": 0.4165,
+ "step": 121450
+ },
+ {
+ "epoch": 0.6268670577491603,
+ "grad_norm": 22151.77734375,
+ "learning_rate": 4.707998122270492e-05,
+ "loss": 0.422,
+ "step": 121500
+ },
+ {
+ "epoch": 0.6271250277317731,
+ "grad_norm": 21278.99609375,
+ "learning_rate": 4.7043514206276276e-05,
+ "loss": 0.4202,
+ "step": 121550
+ },
+ {
+ "epoch": 0.627382997714386,
+ "grad_norm": 24062.6015625,
+ "learning_rate": 4.70070487679625e-05,
+ "loss": 0.4174,
+ "step": 121600
+ },
+ {
+ "epoch": 0.6276409676969987,
+ "grad_norm": 21124.400390625,
+ "learning_rate": 4.697058492722815e-05,
+ "loss": 0.4156,
+ "step": 121650
+ },
+ {
+ "epoch": 0.6278989376796116,
+ "grad_norm": 22513.48046875,
+ "learning_rate": 4.6934122703536894e-05,
+ "loss": 0.4198,
+ "step": 121700
+ },
+ {
+ "epoch": 0.6281569076622244,
+ "grad_norm": 24250.720703125,
+ "learning_rate": 4.689766211635159e-05,
+ "loss": 0.4197,
+ "step": 121750
+ },
+ {
+ "epoch": 0.6284148776448373,
+ "grad_norm": 23831.220703125,
+ "learning_rate": 4.686120318513415e-05,
+ "loss": 0.415,
+ "step": 121800
+ },
+ {
+ "epoch": 0.62867284762745,
+ "grad_norm": 24005.458984375,
+ "learning_rate": 4.682474592934569e-05,
+ "loss": 0.4154,
+ "step": 121850
+ },
+ {
+ "epoch": 0.6289308176100629,
+ "grad_norm": 21365.09375,
+ "learning_rate": 4.6788290368446355e-05,
+ "loss": 0.4164,
+ "step": 121900
+ },
+ {
+ "epoch": 0.6291887875926757,
+ "grad_norm": 23601.689453125,
+ "learning_rate": 4.675183652189545e-05,
+ "loss": 0.418,
+ "step": 121950
+ },
+ {
+ "epoch": 0.6294467575752886,
+ "grad_norm": 21023.33203125,
+ "learning_rate": 4.671538440915129e-05,
+ "loss": 0.4181,
+ "step": 122000
+ },
+ {
+ "epoch": 0.6297047275579014,
+ "grad_norm": 22292.671875,
+ "learning_rate": 4.667893404967133e-05,
+ "loss": 0.4203,
+ "step": 122050
+ },
+ {
+ "epoch": 0.6299626975405141,
+ "grad_norm": 21975.3671875,
+ "learning_rate": 4.664248546291207e-05,
+ "loss": 0.4162,
+ "step": 122100
+ },
+ {
+ "epoch": 0.630220667523127,
+ "grad_norm": 22591.34765625,
+ "learning_rate": 4.660603866832906e-05,
+ "loss": 0.4146,
+ "step": 122150
+ },
+ {
+ "epoch": 0.6304786375057398,
+ "grad_norm": 23449.529296875,
+ "learning_rate": 4.6569593685376895e-05,
+ "loss": 0.4205,
+ "step": 122200
+ },
+ {
+ "epoch": 0.6307366074883527,
+ "grad_norm": 21614.046875,
+ "learning_rate": 4.653315053350918e-05,
+ "loss": 0.4173,
+ "step": 122250
+ },
+ {
+ "epoch": 0.6309945774709654,
+ "grad_norm": 26004.5859375,
+ "learning_rate": 4.649670923217859e-05,
+ "loss": 0.4137,
+ "step": 122300
+ },
+ {
+ "epoch": 0.6312525474535783,
+ "grad_norm": 23640.357421875,
+ "learning_rate": 4.646026980083676e-05,
+ "loss": 0.4165,
+ "step": 122350
+ },
+ {
+ "epoch": 0.6315105174361911,
+ "grad_norm": 23575.3984375,
+ "learning_rate": 4.6423832258934396e-05,
+ "loss": 0.4179,
+ "step": 122400
+ },
+ {
+ "epoch": 0.631768487418804,
+ "grad_norm": 26795.05078125,
+ "learning_rate": 4.63873966259211e-05,
+ "loss": 0.4171,
+ "step": 122450
+ },
+ {
+ "epoch": 0.6320264574014167,
+ "grad_norm": 22246.931640625,
+ "learning_rate": 4.6350962921245515e-05,
+ "loss": 0.4188,
+ "step": 122500
+ },
+ {
+ "epoch": 0.6322844273840296,
+ "grad_norm": 22268.3359375,
+ "learning_rate": 4.63145311643553e-05,
+ "loss": 0.4141,
+ "step": 122550
+ },
+ {
+ "epoch": 0.6325423973666424,
+ "grad_norm": 23749.38671875,
+ "learning_rate": 4.627810137469696e-05,
+ "loss": 0.4132,
+ "step": 122600
+ },
+ {
+ "epoch": 0.6328003673492553,
+ "grad_norm": 22449.15625,
+ "learning_rate": 4.624167357171606e-05,
+ "loss": 0.4177,
+ "step": 122650
+ },
+ {
+ "epoch": 0.6330583373318681,
+ "grad_norm": 22132.927734375,
+ "learning_rate": 4.6205247774857e-05,
+ "loss": 0.4211,
+ "step": 122700
+ },
+ {
+ "epoch": 0.6333163073144809,
+ "grad_norm": 20199.654296875,
+ "learning_rate": 4.616882400356323e-05,
+ "loss": 0.4127,
+ "step": 122750
+ },
+ {
+ "epoch": 0.6335742772970937,
+ "grad_norm": 23172.29296875,
+ "learning_rate": 4.613240227727699e-05,
+ "loss": 0.4173,
+ "step": 122800
+ },
+ {
+ "epoch": 0.6338322472797066,
+ "grad_norm": 23373.6640625,
+ "learning_rate": 4.609598261543954e-05,
+ "loss": 0.4139,
+ "step": 122850
+ },
+ {
+ "epoch": 0.6340902172623194,
+ "grad_norm": 22187.794921875,
+ "learning_rate": 4.6059565037490965e-05,
+ "loss": 0.4233,
+ "step": 122900
+ },
+ {
+ "epoch": 0.6343481872449321,
+ "grad_norm": 21762.28515625,
+ "learning_rate": 4.602314956287027e-05,
+ "loss": 0.4195,
+ "step": 122950
+ },
+ {
+ "epoch": 0.634606157227545,
+ "grad_norm": 24228.3125,
+ "learning_rate": 4.598673621101535e-05,
+ "loss": 0.4218,
+ "step": 123000
+ },
+ {
+ "epoch": 0.6348641272101578,
+ "grad_norm": 20360.208984375,
+ "learning_rate": 4.595032500136291e-05,
+ "loss": 0.4266,
+ "step": 123050
+ },
+ {
+ "epoch": 0.6351220971927707,
+ "grad_norm": 22763.712890625,
+ "learning_rate": 4.5913915953348574e-05,
+ "loss": 0.4153,
+ "step": 123100
+ },
+ {
+ "epoch": 0.6353800671753834,
+ "grad_norm": 25601.05078125,
+ "learning_rate": 4.5877509086406766e-05,
+ "loss": 0.4201,
+ "step": 123150
+ },
+ {
+ "epoch": 0.6356380371579963,
+ "grad_norm": 22695.91015625,
+ "learning_rate": 4.584110441997081e-05,
+ "loss": 0.4174,
+ "step": 123200
+ },
+ {
+ "epoch": 0.6358960071406091,
+ "grad_norm": 24915.857421875,
+ "learning_rate": 4.5804701973472755e-05,
+ "loss": 0.416,
+ "step": 123250
+ },
+ {
+ "epoch": 0.636153977123222,
+ "grad_norm": 24066.427734375,
+ "learning_rate": 4.576830176634356e-05,
+ "loss": 0.415,
+ "step": 123300
+ },
+ {
+ "epoch": 0.6364119471058348,
+ "grad_norm": 25726.71484375,
+ "learning_rate": 4.573190381801293e-05,
+ "loss": 0.4204,
+ "step": 123350
+ },
+ {
+ "epoch": 0.6366699170884476,
+ "grad_norm": 24271.998046875,
+ "learning_rate": 4.56955081479094e-05,
+ "loss": 0.4166,
+ "step": 123400
+ },
+ {
+ "epoch": 0.6369278870710604,
+ "grad_norm": 20897.818359375,
+ "learning_rate": 4.5659114775460286e-05,
+ "loss": 0.4156,
+ "step": 123450
+ },
+ {
+ "epoch": 0.6371858570536733,
+ "grad_norm": 24409.841796875,
+ "learning_rate": 4.562272372009163e-05,
+ "loss": 0.4208,
+ "step": 123500
+ },
+ {
+ "epoch": 0.6374438270362861,
+ "grad_norm": 24757.927734375,
+ "learning_rate": 4.5586335001228296e-05,
+ "loss": 0.4167,
+ "step": 123550
+ },
+ {
+ "epoch": 0.6377017970188988,
+ "grad_norm": 22433.091796875,
+ "learning_rate": 4.554994863829387e-05,
+ "loss": 0.4206,
+ "step": 123600
+ },
+ {
+ "epoch": 0.6379597670015117,
+ "grad_norm": 22757.798828125,
+ "learning_rate": 4.5513564650710706e-05,
+ "loss": 0.4113,
+ "step": 123650
+ },
+ {
+ "epoch": 0.6382177369841245,
+ "grad_norm": 22652.9140625,
+ "learning_rate": 4.547718305789984e-05,
+ "loss": 0.4224,
+ "step": 123700
+ },
+ {
+ "epoch": 0.6384757069667374,
+ "grad_norm": 25416.0390625,
+ "learning_rate": 4.5440803879281086e-05,
+ "loss": 0.4129,
+ "step": 123750
+ },
+ {
+ "epoch": 0.6387336769493501,
+ "grad_norm": 22621.40625,
+ "learning_rate": 4.5404427134272926e-05,
+ "loss": 0.4204,
+ "step": 123800
+ },
+ {
+ "epoch": 0.638991646931963,
+ "grad_norm": 24213.93359375,
+ "learning_rate": 4.536805284229258e-05,
+ "loss": 0.4109,
+ "step": 123850
+ },
+ {
+ "epoch": 0.6392496169145758,
+ "grad_norm": 20231.091796875,
+ "learning_rate": 4.5331681022755946e-05,
+ "loss": 0.4221,
+ "step": 123900
+ },
+ {
+ "epoch": 0.6395075868971887,
+ "grad_norm": 22513.21875,
+ "learning_rate": 4.529531169507757e-05,
+ "loss": 0.4189,
+ "step": 123950
+ },
+ {
+ "epoch": 0.6397655568798014,
+ "grad_norm": 19454.783203125,
+ "learning_rate": 4.5258944878670714e-05,
+ "loss": 0.4138,
+ "step": 124000
+ },
+ {
+ "epoch": 0.6400235268624143,
+ "grad_norm": 23547.423828125,
+ "learning_rate": 4.522258059294727e-05,
+ "loss": 0.4206,
+ "step": 124050
+ },
+ {
+ "epoch": 0.6402814968450271,
+ "grad_norm": 23985.0703125,
+ "learning_rate": 4.5186218857317825e-05,
+ "loss": 0.4186,
+ "step": 124100
+ },
+ {
+ "epoch": 0.64053946682764,
+ "grad_norm": 22254.078125,
+ "learning_rate": 4.5149859691191517e-05,
+ "loss": 0.4076,
+ "step": 124150
+ },
+ {
+ "epoch": 0.6407974368102528,
+ "grad_norm": 24060.70703125,
+ "learning_rate": 4.5113503113976194e-05,
+ "loss": 0.4207,
+ "step": 124200
+ },
+ {
+ "epoch": 0.6410554067928655,
+ "grad_norm": 21521.923828125,
+ "learning_rate": 4.5077149145078275e-05,
+ "loss": 0.4134,
+ "step": 124250
+ },
+ {
+ "epoch": 0.6413133767754784,
+ "grad_norm": 22107.48828125,
+ "learning_rate": 4.504079780390282e-05,
+ "loss": 0.4095,
+ "step": 124300
+ },
+ {
+ "epoch": 0.6415713467580912,
+ "grad_norm": 22610.880859375,
+ "learning_rate": 4.5004449109853485e-05,
+ "loss": 0.4216,
+ "step": 124350
+ },
+ {
+ "epoch": 0.6418293167407041,
+ "grad_norm": 22752.83984375,
+ "learning_rate": 4.496810308233247e-05,
+ "loss": 0.4225,
+ "step": 124400
+ },
+ {
+ "epoch": 0.6420872867233168,
+ "grad_norm": 22029.88671875,
+ "learning_rate": 4.4931759740740596e-05,
+ "loss": 0.4138,
+ "step": 124450
+ },
+ {
+ "epoch": 0.6423452567059297,
+ "grad_norm": 24989.2421875,
+ "learning_rate": 4.489541910447722e-05,
+ "loss": 0.4166,
+ "step": 124500
+ },
+ {
+ "epoch": 0.6426032266885425,
+ "grad_norm": 25843.16796875,
+ "learning_rate": 4.485908119294031e-05,
+ "loss": 0.4132,
+ "step": 124550
+ },
+ {
+ "epoch": 0.6428611966711554,
+ "grad_norm": 23847.01171875,
+ "learning_rate": 4.4822746025526286e-05,
+ "loss": 0.4256,
+ "step": 124600
+ },
+ {
+ "epoch": 0.6431191666537681,
+ "grad_norm": 21634.71484375,
+ "learning_rate": 4.478641362163019e-05,
+ "loss": 0.4182,
+ "step": 124650
+ },
+ {
+ "epoch": 0.643377136636381,
+ "grad_norm": 22252.021484375,
+ "learning_rate": 4.475008400064554e-05,
+ "loss": 0.419,
+ "step": 124700
+ },
+ {
+ "epoch": 0.6436351066189938,
+ "grad_norm": 24151.951171875,
+ "learning_rate": 4.471375718196439e-05,
+ "loss": 0.4201,
+ "step": 124750
+ },
+ {
+ "epoch": 0.6438930766016067,
+ "grad_norm": 23570.310546875,
+ "learning_rate": 4.4677433184977315e-05,
+ "loss": 0.4131,
+ "step": 124800
+ },
+ {
+ "epoch": 0.6441510465842195,
+ "grad_norm": 23886.896484375,
+ "learning_rate": 4.464111202907332e-05,
+ "loss": 0.4172,
+ "step": 124850
+ },
+ {
+ "epoch": 0.6444090165668322,
+ "grad_norm": 23476.888671875,
+ "learning_rate": 4.4604793733639973e-05,
+ "loss": 0.419,
+ "step": 124900
+ },
+ {
+ "epoch": 0.6446669865494451,
+ "grad_norm": 22735.759765625,
+ "learning_rate": 4.456847831806324e-05,
+ "loss": 0.4214,
+ "step": 124950
+ },
+ {
+ "epoch": 0.644924956532058,
+ "grad_norm": 25508.525390625,
+ "learning_rate": 4.4532165801727626e-05,
+ "loss": 0.4184,
+ "step": 125000
+ },
+ {
+ "epoch": 0.644924956532058,
+ "eval_loss": 0.40382638573646545,
+ "eval_runtime": 3215.6548,
+ "eval_samples_per_second": 964.382,
+ "eval_steps_per_second": 1.884,
+ "step": 125000
+ },
+ {
+ "epoch": 0.6451829265146708,
+ "grad_norm": 23686.8671875,
+ "learning_rate": 4.449585620401601e-05,
+ "loss": 0.4115,
+ "step": 125050
+ },
+ {
+ "epoch": 0.6454408964972835,
+ "grad_norm": 22472.7421875,
+ "learning_rate": 4.445954954430976e-05,
+ "loss": 0.4187,
+ "step": 125100
+ },
+ {
+ "epoch": 0.6456988664798964,
+ "grad_norm": 25044.5859375,
+ "learning_rate": 4.442324584198871e-05,
+ "loss": 0.4188,
+ "step": 125150
+ },
+ {
+ "epoch": 0.6459568364625092,
+ "grad_norm": 23489.119140625,
+ "learning_rate": 4.4386945116431025e-05,
+ "loss": 0.4212,
+ "step": 125200
+ },
+ {
+ "epoch": 0.6462148064451221,
+ "grad_norm": 23150.12109375,
+ "learning_rate": 4.435064738701335e-05,
+ "loss": 0.4155,
+ "step": 125250
+ },
+ {
+ "epoch": 0.6464727764277348,
+ "grad_norm": 22082.09765625,
+ "learning_rate": 4.4314352673110696e-05,
+ "loss": 0.4208,
+ "step": 125300
+ },
+ {
+ "epoch": 0.6467307464103477,
+ "grad_norm": 23107.71484375,
+ "learning_rate": 4.427806099409652e-05,
+ "loss": 0.4172,
+ "step": 125350
+ },
+ {
+ "epoch": 0.6469887163929605,
+ "grad_norm": 23660.607421875,
+ "learning_rate": 4.4241772369342554e-05,
+ "loss": 0.4156,
+ "step": 125400
+ },
+ {
+ "epoch": 0.6472466863755734,
+ "grad_norm": 22054.47265625,
+ "learning_rate": 4.420548681821901e-05,
+ "loss": 0.4174,
+ "step": 125450
+ },
+ {
+ "epoch": 0.6475046563581862,
+ "grad_norm": 22386.654296875,
+ "learning_rate": 4.416920436009439e-05,
+ "loss": 0.4164,
+ "step": 125500
+ },
+ {
+ "epoch": 0.647762626340799,
+ "grad_norm": 22394.78125,
+ "learning_rate": 4.413292501433557e-05,
+ "loss": 0.4128,
+ "step": 125550
+ },
+ {
+ "epoch": 0.6480205963234118,
+ "grad_norm": 21871.1953125,
+ "learning_rate": 4.4096648800307796e-05,
+ "loss": 0.4174,
+ "step": 125600
+ },
+ {
+ "epoch": 0.6482785663060247,
+ "grad_norm": 21630.826171875,
+ "learning_rate": 4.406037573737456e-05,
+ "loss": 0.4146,
+ "step": 125650
+ },
+ {
+ "epoch": 0.6485365362886375,
+ "grad_norm": 20917.244140625,
+ "learning_rate": 4.4024105844897744e-05,
+ "loss": 0.4172,
+ "step": 125700
+ },
+ {
+ "epoch": 0.6487945062712502,
+ "grad_norm": 21545.53515625,
+ "learning_rate": 4.3987839142237505e-05,
+ "loss": 0.4189,
+ "step": 125750
+ },
+ {
+ "epoch": 0.6490524762538631,
+ "grad_norm": 27708.19140625,
+ "learning_rate": 4.395157564875234e-05,
+ "loss": 0.4127,
+ "step": 125800
+ },
+ {
+ "epoch": 0.6493104462364759,
+ "grad_norm": 23791.052734375,
+ "learning_rate": 4.391531538379895e-05,
+ "loss": 0.4146,
+ "step": 125850
+ },
+ {
+ "epoch": 0.6495684162190888,
+ "grad_norm": 23441.0078125,
+ "learning_rate": 4.387905836673239e-05,
+ "loss": 0.4191,
+ "step": 125900
+ },
+ {
+ "epoch": 0.6498263862017015,
+ "grad_norm": 21998.982421875,
+ "learning_rate": 4.3842804616905944e-05,
+ "loss": 0.4165,
+ "step": 125950
+ },
+ {
+ "epoch": 0.6500843561843144,
+ "grad_norm": 26170.572265625,
+ "learning_rate": 4.380655415367116e-05,
+ "loss": 0.4106,
+ "step": 126000
+ },
+ {
+ "epoch": 0.6503423261669272,
+ "grad_norm": 23915.345703125,
+ "learning_rate": 4.3770306996377866e-05,
+ "loss": 0.417,
+ "step": 126050
+ },
+ {
+ "epoch": 0.6506002961495401,
+ "grad_norm": 22807.23828125,
+ "learning_rate": 4.373406316437404e-05,
+ "loss": 0.4138,
+ "step": 126100
+ },
+ {
+ "epoch": 0.6508582661321528,
+ "grad_norm": 22825.060546875,
+ "learning_rate": 4.369782267700598e-05,
+ "loss": 0.4159,
+ "step": 126150
+ },
+ {
+ "epoch": 0.6511162361147657,
+ "grad_norm": 21670.83984375,
+ "learning_rate": 4.366158555361812e-05,
+ "loss": 0.4131,
+ "step": 126200
+ },
+ {
+ "epoch": 0.6513742060973785,
+ "grad_norm": 24840.630859375,
+ "learning_rate": 4.362535181355319e-05,
+ "loss": 0.4072,
+ "step": 126250
+ },
+ {
+ "epoch": 0.6516321760799914,
+ "grad_norm": 24121.158203125,
+ "learning_rate": 4.358912147615199e-05,
+ "loss": 0.4085,
+ "step": 126300
+ },
+ {
+ "epoch": 0.6518901460626042,
+ "grad_norm": 21738.236328125,
+ "learning_rate": 4.355289456075363e-05,
+ "loss": 0.4154,
+ "step": 126350
+ },
+ {
+ "epoch": 0.6521481160452169,
+ "grad_norm": 24880.833984375,
+ "learning_rate": 4.3516671086695296e-05,
+ "loss": 0.4154,
+ "step": 126400
+ },
+ {
+ "epoch": 0.6524060860278298,
+ "grad_norm": 21572.140625,
+ "learning_rate": 4.348045107331239e-05,
+ "loss": 0.4185,
+ "step": 126450
+ },
+ {
+ "epoch": 0.6526640560104426,
+ "grad_norm": 24076.17578125,
+ "learning_rate": 4.344423453993849e-05,
+ "loss": 0.4132,
+ "step": 126500
+ },
+ {
+ "epoch": 0.6529220259930555,
+ "grad_norm": 23531.365234375,
+ "learning_rate": 4.340802150590522e-05,
+ "loss": 0.4179,
+ "step": 126550
+ },
+ {
+ "epoch": 0.6531799959756682,
+ "grad_norm": 24287.568359375,
+ "learning_rate": 4.337181199054243e-05,
+ "loss": 0.4136,
+ "step": 126600
+ },
+ {
+ "epoch": 0.6534379659582811,
+ "grad_norm": 23352.52734375,
+ "learning_rate": 4.3335606013178046e-05,
+ "loss": 0.4177,
+ "step": 126650
+ },
+ {
+ "epoch": 0.6536959359408939,
+ "grad_norm": 22291.494140625,
+ "learning_rate": 4.3299403593138144e-05,
+ "loss": 0.4155,
+ "step": 126700
+ },
+ {
+ "epoch": 0.6539539059235068,
+ "grad_norm": 20745.798828125,
+ "learning_rate": 4.3263204749746836e-05,
+ "loss": 0.4139,
+ "step": 126750
+ },
+ {
+ "epoch": 0.6542118759061195,
+ "grad_norm": 24670.357421875,
+ "learning_rate": 4.322700950232639e-05,
+ "loss": 0.423,
+ "step": 126800
+ },
+ {
+ "epoch": 0.6544698458887324,
+ "grad_norm": 23067.81640625,
+ "learning_rate": 4.31908178701971e-05,
+ "loss": 0.4174,
+ "step": 126850
+ },
+ {
+ "epoch": 0.6547278158713452,
+ "grad_norm": 25275.47265625,
+ "learning_rate": 4.315462987267739e-05,
+ "loss": 0.4181,
+ "step": 126900
+ },
+ {
+ "epoch": 0.6549857858539581,
+ "grad_norm": 21032.4375,
+ "learning_rate": 4.311844552908372e-05,
+ "loss": 0.4111,
+ "step": 126950
+ },
+ {
+ "epoch": 0.6552437558365709,
+ "grad_norm": 21629.0625,
+ "learning_rate": 4.308226485873056e-05,
+ "loss": 0.4129,
+ "step": 127000
+ },
+ {
+ "epoch": 0.6555017258191836,
+ "grad_norm": 24375.935546875,
+ "learning_rate": 4.3046087880930466e-05,
+ "loss": 0.4129,
+ "step": 127050
+ },
+ {
+ "epoch": 0.6557596958017965,
+ "grad_norm": 21224.63671875,
+ "learning_rate": 4.3009914614994e-05,
+ "loss": 0.4156,
+ "step": 127100
+ },
+ {
+ "epoch": 0.6560176657844093,
+ "grad_norm": 24836.560546875,
+ "learning_rate": 4.297374508022977e-05,
+ "loss": 0.4133,
+ "step": 127150
+ },
+ {
+ "epoch": 0.6562756357670222,
+ "grad_norm": 22769.599609375,
+ "learning_rate": 4.293757929594435e-05,
+ "loss": 0.4151,
+ "step": 127200
+ },
+ {
+ "epoch": 0.6565336057496349,
+ "grad_norm": 22936.603515625,
+ "learning_rate": 4.2901417281442345e-05,
+ "loss": 0.4173,
+ "step": 127250
+ },
+ {
+ "epoch": 0.6567915757322478,
+ "grad_norm": 21296.39453125,
+ "learning_rate": 4.286525905602634e-05,
+ "loss": 0.4121,
+ "step": 127300
+ },
+ {
+ "epoch": 0.6570495457148606,
+ "grad_norm": 24282.591796875,
+ "learning_rate": 4.282910463899689e-05,
+ "loss": 0.4086,
+ "step": 127350
+ },
+ {
+ "epoch": 0.6573075156974735,
+ "grad_norm": 22443.6015625,
+ "learning_rate": 4.2792954049652545e-05,
+ "loss": 0.4183,
+ "step": 127400
+ },
+ {
+ "epoch": 0.6575654856800862,
+ "grad_norm": 21437.98046875,
+ "learning_rate": 4.275680730728976e-05,
+ "loss": 0.4172,
+ "step": 127450
+ },
+ {
+ "epoch": 0.6578234556626991,
+ "grad_norm": 24970.3125,
+ "learning_rate": 4.2720664431202987e-05,
+ "loss": 0.4187,
+ "step": 127500
+ },
+ {
+ "epoch": 0.6580814256453119,
+ "grad_norm": 21128.349609375,
+ "learning_rate": 4.268452544068457e-05,
+ "loss": 0.4142,
+ "step": 127550
+ },
+ {
+ "epoch": 0.6583393956279248,
+ "grad_norm": 26429.14453125,
+ "learning_rate": 4.2648390355024836e-05,
+ "loss": 0.4115,
+ "step": 127600
+ },
+ {
+ "epoch": 0.6585973656105376,
+ "grad_norm": 22542.380859375,
+ "learning_rate": 4.261225919351195e-05,
+ "loss": 0.4144,
+ "step": 127650
+ },
+ {
+ "epoch": 0.6588553355931503,
+ "grad_norm": 23179.853515625,
+ "learning_rate": 4.257613197543207e-05,
+ "loss": 0.4164,
+ "step": 127700
+ },
+ {
+ "epoch": 0.6591133055757632,
+ "grad_norm": 24641.048828125,
+ "learning_rate": 4.254000872006918e-05,
+ "loss": 0.4175,
+ "step": 127750
+ },
+ {
+ "epoch": 0.659371275558376,
+ "grad_norm": 23836.771484375,
+ "learning_rate": 4.250388944670517e-05,
+ "loss": 0.4201,
+ "step": 127800
+ },
+ {
+ "epoch": 0.6596292455409889,
+ "grad_norm": 23714.7578125,
+ "learning_rate": 4.2467774174619836e-05,
+ "loss": 0.4102,
+ "step": 127850
+ },
+ {
+ "epoch": 0.6598872155236016,
+ "grad_norm": 23630.2890625,
+ "learning_rate": 4.2431662923090785e-05,
+ "loss": 0.411,
+ "step": 127900
+ },
+ {
+ "epoch": 0.6601451855062145,
+ "grad_norm": 23018.384765625,
+ "learning_rate": 4.239555571139353e-05,
+ "loss": 0.4113,
+ "step": 127950
+ },
+ {
+ "epoch": 0.6604031554888273,
+ "grad_norm": 23594.041015625,
+ "learning_rate": 4.235945255880137e-05,
+ "loss": 0.4153,
+ "step": 128000
+ },
+ {
+ "epoch": 0.6606611254714402,
+ "grad_norm": 24231.07421875,
+ "learning_rate": 4.232335348458549e-05,
+ "loss": 0.4159,
+ "step": 128050
+ },
+ {
+ "epoch": 0.6609190954540529,
+ "grad_norm": 22362.98828125,
+ "learning_rate": 4.228725850801486e-05,
+ "loss": 0.4218,
+ "step": 128100
+ },
+ {
+ "epoch": 0.6611770654366658,
+ "grad_norm": 23008.44140625,
+ "learning_rate": 4.225116764835631e-05,
+ "loss": 0.416,
+ "step": 128150
+ },
+ {
+ "epoch": 0.6614350354192786,
+ "grad_norm": 23027.1875,
+ "learning_rate": 4.221508092487441e-05,
+ "loss": 0.4163,
+ "step": 128200
+ },
+ {
+ "epoch": 0.6616930054018915,
+ "grad_norm": 25121.61328125,
+ "learning_rate": 4.2178998356831553e-05,
+ "loss": 0.4167,
+ "step": 128250
+ },
+ {
+ "epoch": 0.6619509753845043,
+ "grad_norm": 24767.4140625,
+ "learning_rate": 4.214291996348794e-05,
+ "loss": 0.4176,
+ "step": 128300
+ },
+ {
+ "epoch": 0.662208945367117,
+ "grad_norm": 24596.533203125,
+ "learning_rate": 4.210684576410151e-05,
+ "loss": 0.4183,
+ "step": 128350
+ },
+ {
+ "epoch": 0.6624669153497299,
+ "grad_norm": 21095.8671875,
+ "learning_rate": 4.2070775777927976e-05,
+ "loss": 0.4151,
+ "step": 128400
+ },
+ {
+ "epoch": 0.6627248853323428,
+ "grad_norm": 25389.1640625,
+ "learning_rate": 4.203471002422077e-05,
+ "loss": 0.4226,
+ "step": 128450
+ },
+ {
+ "epoch": 0.6629828553149556,
+ "grad_norm": 24613.94921875,
+ "learning_rate": 4.199864852223113e-05,
+ "loss": 0.4093,
+ "step": 128500
+ },
+ {
+ "epoch": 0.6632408252975683,
+ "grad_norm": 23665.59765625,
+ "learning_rate": 4.196259129120796e-05,
+ "loss": 0.4135,
+ "step": 128550
+ },
+ {
+ "epoch": 0.6634987952801812,
+ "grad_norm": 22946.5234375,
+ "learning_rate": 4.192653835039795e-05,
+ "loss": 0.4151,
+ "step": 128600
+ },
+ {
+ "epoch": 0.663756765262794,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 4.189048971904541e-05,
+ "loss": 0.4064,
+ "step": 128650
+ },
+ {
+ "epoch": 0.6640147352454069,
+ "grad_norm": 22760.623046875,
+ "learning_rate": 4.185444541639243e-05,
+ "loss": 0.4084,
+ "step": 128700
+ },
+ {
+ "epoch": 0.6642727052280196,
+ "grad_norm": 25223.484375,
+ "learning_rate": 4.1818405461678763e-05,
+ "loss": 0.4151,
+ "step": 128750
+ },
+ {
+ "epoch": 0.6645306752106325,
+ "grad_norm": 31547.962890625,
+ "learning_rate": 4.178236987414182e-05,
+ "loss": 0.4115,
+ "step": 128800
+ },
+ {
+ "epoch": 0.6647886451932453,
+ "grad_norm": 19114.953125,
+ "learning_rate": 4.174633867301674e-05,
+ "loss": 0.4109,
+ "step": 128850
+ },
+ {
+ "epoch": 0.6650466151758582,
+ "grad_norm": 22819.888671875,
+ "learning_rate": 4.1710311877536226e-05,
+ "loss": 0.4123,
+ "step": 128900
+ },
+ {
+ "epoch": 0.6653045851584709,
+ "grad_norm": 22868.62890625,
+ "learning_rate": 4.167428950693073e-05,
+ "loss": 0.413,
+ "step": 128950
+ },
+ {
+ "epoch": 0.6655625551410838,
+ "grad_norm": 23062.359375,
+ "learning_rate": 4.163827158042826e-05,
+ "loss": 0.4152,
+ "step": 129000
+ },
+ {
+ "epoch": 0.6658205251236966,
+ "grad_norm": 25990.505859375,
+ "learning_rate": 4.160225811725453e-05,
+ "loss": 0.4176,
+ "step": 129050
+ },
+ {
+ "epoch": 0.6660784951063095,
+ "grad_norm": 21594.1953125,
+ "learning_rate": 4.156624913663279e-05,
+ "loss": 0.4136,
+ "step": 129100
+ },
+ {
+ "epoch": 0.6663364650889223,
+ "grad_norm": 21145.869140625,
+ "learning_rate": 4.153024465778393e-05,
+ "loss": 0.4216,
+ "step": 129150
+ },
+ {
+ "epoch": 0.666594435071535,
+ "grad_norm": 22634.7734375,
+ "learning_rate": 4.149424469992649e-05,
+ "loss": 0.4114,
+ "step": 129200
+ },
+ {
+ "epoch": 0.6668524050541479,
+ "grad_norm": 23526.46875,
+ "learning_rate": 4.145824928227652e-05,
+ "loss": 0.4217,
+ "step": 129250
+ },
+ {
+ "epoch": 0.6671103750367607,
+ "grad_norm": 22295.880859375,
+ "learning_rate": 4.142225842404769e-05,
+ "loss": 0.4169,
+ "step": 129300
+ },
+ {
+ "epoch": 0.6673683450193736,
+ "grad_norm": 22282.421875,
+ "learning_rate": 4.13862721444512e-05,
+ "loss": 0.4195,
+ "step": 129350
+ },
+ {
+ "epoch": 0.6676263150019863,
+ "grad_norm": 21856.337890625,
+ "learning_rate": 4.135029046269585e-05,
+ "loss": 0.4229,
+ "step": 129400
+ },
+ {
+ "epoch": 0.6678842849845992,
+ "grad_norm": 20999.04296875,
+ "learning_rate": 4.131431339798796e-05,
+ "loss": 0.4168,
+ "step": 129450
+ },
+ {
+ "epoch": 0.668142254967212,
+ "grad_norm": 24684.484375,
+ "learning_rate": 4.12783409695314e-05,
+ "loss": 0.4117,
+ "step": 129500
+ },
+ {
+ "epoch": 0.6684002249498249,
+ "grad_norm": 24120.349609375,
+ "learning_rate": 4.124237319652753e-05,
+ "loss": 0.4186,
+ "step": 129550
+ },
+ {
+ "epoch": 0.6686581949324376,
+ "grad_norm": 23283.736328125,
+ "learning_rate": 4.1206410098175265e-05,
+ "loss": 0.4176,
+ "step": 129600
+ },
+ {
+ "epoch": 0.6689161649150505,
+ "grad_norm": 21902.6875,
+ "learning_rate": 4.117045169367102e-05,
+ "loss": 0.4153,
+ "step": 129650
+ },
+ {
+ "epoch": 0.6691741348976633,
+ "grad_norm": 22762.6015625,
+ "learning_rate": 4.1134498002208674e-05,
+ "loss": 0.414,
+ "step": 129700
+ },
+ {
+ "epoch": 0.6694321048802762,
+ "grad_norm": 20947.083984375,
+ "learning_rate": 4.109854904297965e-05,
+ "loss": 0.4113,
+ "step": 129750
+ },
+ {
+ "epoch": 0.669690074862889,
+ "grad_norm": 24687.189453125,
+ "learning_rate": 4.106260483517276e-05,
+ "loss": 0.4207,
+ "step": 129800
+ },
+ {
+ "epoch": 0.6699480448455017,
+ "grad_norm": 24164.724609375,
+ "learning_rate": 4.102666539797435e-05,
+ "loss": 0.4116,
+ "step": 129850
+ },
+ {
+ "epoch": 0.6702060148281146,
+ "grad_norm": 23408.68359375,
+ "learning_rate": 4.099073075056818e-05,
+ "loss": 0.4181,
+ "step": 129900
+ },
+ {
+ "epoch": 0.6704639848107274,
+ "grad_norm": 22822.3515625,
+ "learning_rate": 4.0954800912135516e-05,
+ "loss": 0.4176,
+ "step": 129950
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "grad_norm": 21576.173828125,
+ "learning_rate": 4.091887590185494e-05,
+ "loss": 0.4165,
+ "step": 130000
+ },
+ {
+ "epoch": 0.6707219547933403,
+ "eval_loss": 0.40186887979507446,
+ "eval_runtime": 3150.7117,
+ "eval_samples_per_second": 984.26,
+ "eval_steps_per_second": 1.922,
+ "step": 130000
+ },
+ {
+ "epoch": 0.670979924775953,
+ "grad_norm": 21987.3671875,
+ "learning_rate": 4.0882955738902576e-05,
+ "loss": 0.4176,
+ "step": 130050
+ },
+ {
+ "epoch": 0.6712378947585659,
+ "grad_norm": 23900.74609375,
+ "learning_rate": 4.0847040442451895e-05,
+ "loss": 0.4183,
+ "step": 130100
+ },
+ {
+ "epoch": 0.6714958647411787,
+ "grad_norm": 22624.236328125,
+ "learning_rate": 4.081113003167378e-05,
+ "loss": 0.4146,
+ "step": 130150
+ },
+ {
+ "epoch": 0.6717538347237916,
+ "grad_norm": 22636.490234375,
+ "learning_rate": 4.0775224525736546e-05,
+ "loss": 0.4107,
+ "step": 130200
+ },
+ {
+ "epoch": 0.6720118047064043,
+ "grad_norm": 22667.66796875,
+ "learning_rate": 4.07393239438058e-05,
+ "loss": 0.4151,
+ "step": 130250
+ },
+ {
+ "epoch": 0.6722697746890172,
+ "grad_norm": 20381.720703125,
+ "learning_rate": 4.070342830504465e-05,
+ "loss": 0.4167,
+ "step": 130300
+ },
+ {
+ "epoch": 0.67252774467163,
+ "grad_norm": 22913.248046875,
+ "learning_rate": 4.0667537628613424e-05,
+ "loss": 0.4116,
+ "step": 130350
+ },
+ {
+ "epoch": 0.6727857146542429,
+ "grad_norm": 23168.865234375,
+ "learning_rate": 4.063165193366992e-05,
+ "loss": 0.413,
+ "step": 130400
+ },
+ {
+ "epoch": 0.6730436846368557,
+ "grad_norm": 21597.861328125,
+ "learning_rate": 4.059577123936918e-05,
+ "loss": 0.4179,
+ "step": 130450
+ },
+ {
+ "epoch": 0.6733016546194684,
+ "grad_norm": 20305.806640625,
+ "learning_rate": 4.055989556486365e-05,
+ "loss": 0.4199,
+ "step": 130500
+ },
+ {
+ "epoch": 0.6735596246020813,
+ "grad_norm": 23520.173828125,
+ "learning_rate": 4.052402492930311e-05,
+ "loss": 0.4154,
+ "step": 130550
+ },
+ {
+ "epoch": 0.6738175945846941,
+ "grad_norm": 23356.85546875,
+ "learning_rate": 4.048815935183453e-05,
+ "loss": 0.4154,
+ "step": 130600
+ },
+ {
+ "epoch": 0.674075564567307,
+ "grad_norm": 22958.611328125,
+ "learning_rate": 4.0452298851602324e-05,
+ "loss": 0.4149,
+ "step": 130650
+ },
+ {
+ "epoch": 0.6743335345499197,
+ "grad_norm": 24888.25390625,
+ "learning_rate": 4.04164434477481e-05,
+ "loss": 0.4166,
+ "step": 130700
+ },
+ {
+ "epoch": 0.6745915045325326,
+ "grad_norm": 22958.189453125,
+ "learning_rate": 4.0380593159410806e-05,
+ "loss": 0.4159,
+ "step": 130750
+ },
+ {
+ "epoch": 0.6748494745151454,
+ "grad_norm": 21863.55859375,
+ "learning_rate": 4.03447480057266e-05,
+ "loss": 0.4142,
+ "step": 130800
+ },
+ {
+ "epoch": 0.6751074444977583,
+ "grad_norm": 23096.375,
+ "learning_rate": 4.030890800582895e-05,
+ "loss": 0.4108,
+ "step": 130850
+ },
+ {
+ "epoch": 0.675365414480371,
+ "grad_norm": 23506.576171875,
+ "learning_rate": 4.027307317884854e-05,
+ "loss": 0.4111,
+ "step": 130900
+ },
+ {
+ "epoch": 0.6756233844629839,
+ "grad_norm": 26913.11328125,
+ "learning_rate": 4.023724354391331e-05,
+ "loss": 0.4145,
+ "step": 130950
+ },
+ {
+ "epoch": 0.6758813544455967,
+ "grad_norm": 22008.958984375,
+ "learning_rate": 4.020141912014846e-05,
+ "loss": 0.4118,
+ "step": 131000
+ },
+ {
+ "epoch": 0.6761393244282096,
+ "grad_norm": 21431.857421875,
+ "learning_rate": 4.016559992667632e-05,
+ "loss": 0.417,
+ "step": 131050
+ },
+ {
+ "epoch": 0.6763972944108223,
+ "grad_norm": 24077.453125,
+ "learning_rate": 4.0129785982616524e-05,
+ "loss": 0.4121,
+ "step": 131100
+ },
+ {
+ "epoch": 0.6766552643934352,
+ "grad_norm": 22978.5390625,
+ "learning_rate": 4.009397730708583e-05,
+ "loss": 0.4074,
+ "step": 131150
+ },
+ {
+ "epoch": 0.676913234376048,
+ "grad_norm": 25474.740234375,
+ "learning_rate": 4.005817391919826e-05,
+ "loss": 0.4159,
+ "step": 131200
+ },
+ {
+ "epoch": 0.6771712043586608,
+ "grad_norm": 23532.416015625,
+ "learning_rate": 4.0022375838064904e-05,
+ "loss": 0.4202,
+ "step": 131250
+ },
+ {
+ "epoch": 0.6774291743412737,
+ "grad_norm": 23746.072265625,
+ "learning_rate": 3.998658308279414e-05,
+ "loss": 0.4157,
+ "step": 131300
+ },
+ {
+ "epoch": 0.6776871443238864,
+ "grad_norm": 21691.6875,
+ "learning_rate": 3.995079567249142e-05,
+ "loss": 0.4158,
+ "step": 131350
+ },
+ {
+ "epoch": 0.6779451143064993,
+ "grad_norm": 24167.923828125,
+ "learning_rate": 3.991501362625937e-05,
+ "loss": 0.4165,
+ "step": 131400
+ },
+ {
+ "epoch": 0.6782030842891121,
+ "grad_norm": 22420.27734375,
+ "learning_rate": 3.9879236963197784e-05,
+ "loss": 0.418,
+ "step": 131450
+ },
+ {
+ "epoch": 0.678461054271725,
+ "grad_norm": 22116.75,
+ "learning_rate": 3.984346570240352e-05,
+ "loss": 0.4152,
+ "step": 131500
+ },
+ {
+ "epoch": 0.6787190242543377,
+ "grad_norm": 23841.001953125,
+ "learning_rate": 3.9807699862970596e-05,
+ "loss": 0.4179,
+ "step": 131550
+ },
+ {
+ "epoch": 0.6789769942369506,
+ "grad_norm": 22931.126953125,
+ "learning_rate": 3.977193946399011e-05,
+ "loss": 0.4171,
+ "step": 131600
+ },
+ {
+ "epoch": 0.6792349642195634,
+ "grad_norm": 24939.294921875,
+ "learning_rate": 3.973618452455031e-05,
+ "loss": 0.4147,
+ "step": 131650
+ },
+ {
+ "epoch": 0.6794929342021763,
+ "grad_norm": 22026.615234375,
+ "learning_rate": 3.970043506373644e-05,
+ "loss": 0.4084,
+ "step": 131700
+ },
+ {
+ "epoch": 0.679750904184789,
+ "grad_norm": 24636.595703125,
+ "learning_rate": 3.9664691100630904e-05,
+ "loss": 0.4137,
+ "step": 131750
+ },
+ {
+ "epoch": 0.6800088741674019,
+ "grad_norm": 25599.443359375,
+ "learning_rate": 3.962895265431311e-05,
+ "loss": 0.4167,
+ "step": 131800
+ },
+ {
+ "epoch": 0.6802668441500147,
+ "grad_norm": 23514.0078125,
+ "learning_rate": 3.9593219743859575e-05,
+ "loss": 0.408,
+ "step": 131850
+ },
+ {
+ "epoch": 0.6805248141326276,
+ "grad_norm": 21798.9609375,
+ "learning_rate": 3.9557492388343844e-05,
+ "loss": 0.4129,
+ "step": 131900
+ },
+ {
+ "epoch": 0.6807827841152404,
+ "grad_norm": 24803.248046875,
+ "learning_rate": 3.952177060683644e-05,
+ "loss": 0.4126,
+ "step": 131950
+ },
+ {
+ "epoch": 0.6810407540978531,
+ "grad_norm": 23215.529296875,
+ "learning_rate": 3.948605441840501e-05,
+ "loss": 0.4114,
+ "step": 132000
+ },
+ {
+ "epoch": 0.681298724080466,
+ "grad_norm": 21179.626953125,
+ "learning_rate": 3.945034384211412e-05,
+ "loss": 0.4139,
+ "step": 132050
+ },
+ {
+ "epoch": 0.6815566940630788,
+ "grad_norm": 22894.04296875,
+ "learning_rate": 3.941463889702543e-05,
+ "loss": 0.4144,
+ "step": 132100
+ },
+ {
+ "epoch": 0.6818146640456917,
+ "grad_norm": 22581.392578125,
+ "learning_rate": 3.937893960219751e-05,
+ "loss": 0.4163,
+ "step": 132150
+ },
+ {
+ "epoch": 0.6820726340283044,
+ "grad_norm": 27557.634765625,
+ "learning_rate": 3.9343245976685966e-05,
+ "loss": 0.4194,
+ "step": 132200
+ },
+ {
+ "epoch": 0.6823306040109173,
+ "grad_norm": 24157.97265625,
+ "learning_rate": 3.9307558039543355e-05,
+ "loss": 0.4089,
+ "step": 132250
+ },
+ {
+ "epoch": 0.6825885739935301,
+ "grad_norm": 23363.904296875,
+ "learning_rate": 3.927187580981922e-05,
+ "loss": 0.4108,
+ "step": 132300
+ },
+ {
+ "epoch": 0.682846543976143,
+ "grad_norm": 24005.15625,
+ "learning_rate": 3.9236199306560054e-05,
+ "loss": 0.4103,
+ "step": 132350
+ },
+ {
+ "epoch": 0.6831045139587557,
+ "grad_norm": 23476.4609375,
+ "learning_rate": 3.920052854880925e-05,
+ "loss": 0.4189,
+ "step": 132400
+ },
+ {
+ "epoch": 0.6833624839413686,
+ "grad_norm": 23734.173828125,
+ "learning_rate": 3.91648635556072e-05,
+ "loss": 0.4183,
+ "step": 132450
+ },
+ {
+ "epoch": 0.6836204539239814,
+ "grad_norm": 22112.642578125,
+ "learning_rate": 3.912920434599117e-05,
+ "loss": 0.4139,
+ "step": 132500
+ },
+ {
+ "epoch": 0.6838784239065943,
+ "grad_norm": 23442.96484375,
+ "learning_rate": 3.909355093899537e-05,
+ "loss": 0.4137,
+ "step": 132550
+ },
+ {
+ "epoch": 0.6841363938892071,
+ "grad_norm": 22873.734375,
+ "learning_rate": 3.905790335365087e-05,
+ "loss": 0.4097,
+ "step": 132600
+ },
+ {
+ "epoch": 0.6843943638718198,
+ "grad_norm": 24382.9140625,
+ "learning_rate": 3.902226160898567e-05,
+ "loss": 0.4134,
+ "step": 132650
+ },
+ {
+ "epoch": 0.6846523338544327,
+ "grad_norm": 23238.1953125,
+ "learning_rate": 3.898662572402468e-05,
+ "loss": 0.4137,
+ "step": 132700
+ },
+ {
+ "epoch": 0.6849103038370455,
+ "grad_norm": 21690.37890625,
+ "learning_rate": 3.89509957177896e-05,
+ "loss": 0.4114,
+ "step": 132750
+ },
+ {
+ "epoch": 0.6851682738196584,
+ "grad_norm": 25762.189453125,
+ "learning_rate": 3.891537160929907e-05,
+ "loss": 0.4134,
+ "step": 132800
+ },
+ {
+ "epoch": 0.6854262438022711,
+ "grad_norm": 22006.044921875,
+ "learning_rate": 3.88797534175685e-05,
+ "loss": 0.4132,
+ "step": 132850
+ },
+ {
+ "epoch": 0.685684213784884,
+ "grad_norm": 22149.5546875,
+ "learning_rate": 3.8844141161610256e-05,
+ "loss": 0.4154,
+ "step": 132900
+ },
+ {
+ "epoch": 0.6859421837674968,
+ "grad_norm": 23865.419921875,
+ "learning_rate": 3.880853486043343e-05,
+ "loss": 0.4135,
+ "step": 132950
+ },
+ {
+ "epoch": 0.6862001537501097,
+ "grad_norm": 22708.126953125,
+ "learning_rate": 3.877293453304399e-05,
+ "loss": 0.4143,
+ "step": 133000
+ },
+ {
+ "epoch": 0.6864581237327224,
+ "grad_norm": 19948.517578125,
+ "learning_rate": 3.8737340198444683e-05,
+ "loss": 0.4181,
+ "step": 133050
+ },
+ {
+ "epoch": 0.6867160937153353,
+ "grad_norm": 22594.826171875,
+ "learning_rate": 3.870175187563509e-05,
+ "loss": 0.4108,
+ "step": 133100
+ },
+ {
+ "epoch": 0.6869740636979481,
+ "grad_norm": 24876.56640625,
+ "learning_rate": 3.866616958361159e-05,
+ "loss": 0.4136,
+ "step": 133150
+ },
+ {
+ "epoch": 0.687232033680561,
+ "grad_norm": 20055.0859375,
+ "learning_rate": 3.8630593341367285e-05,
+ "loss": 0.4176,
+ "step": 133200
+ },
+ {
+ "epoch": 0.6874900036631737,
+ "grad_norm": 24807.9140625,
+ "learning_rate": 3.8595023167892096e-05,
+ "loss": 0.4084,
+ "step": 133250
+ },
+ {
+ "epoch": 0.6877479736457865,
+ "grad_norm": 21060.78125,
+ "learning_rate": 3.8559459082172696e-05,
+ "loss": 0.4086,
+ "step": 133300
+ },
+ {
+ "epoch": 0.6880059436283994,
+ "grad_norm": 22740.255859375,
+ "learning_rate": 3.852390110319252e-05,
+ "loss": 0.4109,
+ "step": 133350
+ },
+ {
+ "epoch": 0.6882639136110122,
+ "grad_norm": 24095.68359375,
+ "learning_rate": 3.848834924993169e-05,
+ "loss": 0.4118,
+ "step": 133400
+ },
+ {
+ "epoch": 0.6885218835936251,
+ "grad_norm": 20011.78125,
+ "learning_rate": 3.8452803541367136e-05,
+ "loss": 0.4133,
+ "step": 133450
+ },
+ {
+ "epoch": 0.6887798535762378,
+ "grad_norm": 21369.7265625,
+ "learning_rate": 3.8417263996472444e-05,
+ "loss": 0.4104,
+ "step": 133500
+ },
+ {
+ "epoch": 0.6890378235588507,
+ "grad_norm": 22532.251953125,
+ "learning_rate": 3.8381730634217946e-05,
+ "loss": 0.415,
+ "step": 133550
+ },
+ {
+ "epoch": 0.6892957935414635,
+ "grad_norm": 21174.34765625,
+ "learning_rate": 3.8346203473570677e-05,
+ "loss": 0.4121,
+ "step": 133600
+ },
+ {
+ "epoch": 0.6895537635240764,
+ "grad_norm": 21758.87109375,
+ "learning_rate": 3.831068253349431e-05,
+ "loss": 0.4181,
+ "step": 133650
+ },
+ {
+ "epoch": 0.6898117335066891,
+ "grad_norm": 21809.083984375,
+ "learning_rate": 3.827516783294927e-05,
+ "loss": 0.41,
+ "step": 133700
+ },
+ {
+ "epoch": 0.690069703489302,
+ "grad_norm": 21419.69921875,
+ "learning_rate": 3.8239659390892593e-05,
+ "loss": 0.4166,
+ "step": 133750
+ },
+ {
+ "epoch": 0.6903276734719148,
+ "grad_norm": 20746.517578125,
+ "learning_rate": 3.820415722627802e-05,
+ "loss": 0.4168,
+ "step": 133800
+ },
+ {
+ "epoch": 0.6905856434545277,
+ "grad_norm": 22737.89453125,
+ "learning_rate": 3.816866135805589e-05,
+ "loss": 0.4119,
+ "step": 133850
+ },
+ {
+ "epoch": 0.6908436134371404,
+ "grad_norm": 23691.408203125,
+ "learning_rate": 3.813317180517324e-05,
+ "loss": 0.4105,
+ "step": 133900
+ },
+ {
+ "epoch": 0.6911015834197533,
+ "grad_norm": 22899.70703125,
+ "learning_rate": 3.8097688586573684e-05,
+ "loss": 0.412,
+ "step": 133950
+ },
+ {
+ "epoch": 0.6913595534023661,
+ "grad_norm": 25553.763671875,
+ "learning_rate": 3.8062211721197475e-05,
+ "loss": 0.4158,
+ "step": 134000
+ },
+ {
+ "epoch": 0.691617523384979,
+ "grad_norm": 22099.93359375,
+ "learning_rate": 3.802674122798152e-05,
+ "loss": 0.4149,
+ "step": 134050
+ },
+ {
+ "epoch": 0.6918754933675918,
+ "grad_norm": 25735.91015625,
+ "learning_rate": 3.799127712585922e-05,
+ "loss": 0.4058,
+ "step": 134100
+ },
+ {
+ "epoch": 0.6921334633502045,
+ "grad_norm": 21259.95703125,
+ "learning_rate": 3.795581943376067e-05,
+ "loss": 0.4192,
+ "step": 134150
+ },
+ {
+ "epoch": 0.6923914333328174,
+ "grad_norm": 22438.23046875,
+ "learning_rate": 3.7920368170612476e-05,
+ "loss": 0.414,
+ "step": 134200
+ },
+ {
+ "epoch": 0.6926494033154302,
+ "grad_norm": 24721.974609375,
+ "learning_rate": 3.788492335533786e-05,
+ "loss": 0.4154,
+ "step": 134250
+ },
+ {
+ "epoch": 0.6929073732980431,
+ "grad_norm": 24267.611328125,
+ "learning_rate": 3.7849485006856545e-05,
+ "loss": 0.4108,
+ "step": 134300
+ },
+ {
+ "epoch": 0.6931653432806558,
+ "grad_norm": 25588.193359375,
+ "learning_rate": 3.781405314408486e-05,
+ "loss": 0.4169,
+ "step": 134350
+ },
+ {
+ "epoch": 0.6934233132632687,
+ "grad_norm": 22651.216796875,
+ "learning_rate": 3.7778627785935626e-05,
+ "loss": 0.4112,
+ "step": 134400
+ },
+ {
+ "epoch": 0.6936812832458815,
+ "grad_norm": 24765.76953125,
+ "learning_rate": 3.774320895131823e-05,
+ "loss": 0.4173,
+ "step": 134450
+ },
+ {
+ "epoch": 0.6939392532284944,
+ "grad_norm": 25384.44921875,
+ "learning_rate": 3.7707796659138584e-05,
+ "loss": 0.4097,
+ "step": 134500
+ },
+ {
+ "epoch": 0.6941972232111071,
+ "grad_norm": 21145.587890625,
+ "learning_rate": 3.767239092829903e-05,
+ "loss": 0.4125,
+ "step": 134550
+ },
+ {
+ "epoch": 0.69445519319372,
+ "grad_norm": 22693.28515625,
+ "learning_rate": 3.763699177769849e-05,
+ "loss": 0.4111,
+ "step": 134600
+ },
+ {
+ "epoch": 0.6947131631763328,
+ "grad_norm": 20415.33984375,
+ "learning_rate": 3.760159922623235e-05,
+ "loss": 0.4178,
+ "step": 134650
+ },
+ {
+ "epoch": 0.6949711331589457,
+ "grad_norm": 23304.33984375,
+ "learning_rate": 3.756621329279247e-05,
+ "loss": 0.4142,
+ "step": 134700
+ },
+ {
+ "epoch": 0.6952291031415585,
+ "grad_norm": 22485.029296875,
+ "learning_rate": 3.7530833996267156e-05,
+ "loss": 0.4129,
+ "step": 134750
+ },
+ {
+ "epoch": 0.6954870731241712,
+ "grad_norm": 20506.5625,
+ "learning_rate": 3.7495461355541206e-05,
+ "loss": 0.4104,
+ "step": 134800
+ },
+ {
+ "epoch": 0.6957450431067841,
+ "grad_norm": 26106.26953125,
+ "learning_rate": 3.746009538949584e-05,
+ "loss": 0.4122,
+ "step": 134850
+ },
+ {
+ "epoch": 0.6960030130893969,
+ "grad_norm": 25230.55859375,
+ "learning_rate": 3.742473611700874e-05,
+ "loss": 0.4173,
+ "step": 134900
+ },
+ {
+ "epoch": 0.6962609830720098,
+ "grad_norm": 23462.197265625,
+ "learning_rate": 3.738938355695402e-05,
+ "loss": 0.4211,
+ "step": 134950
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "grad_norm": 22550.8359375,
+ "learning_rate": 3.735403772820213e-05,
+ "loss": 0.4154,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6965189530546225,
+ "eval_loss": 0.399837851524353,
+ "eval_runtime": 3136.0222,
+ "eval_samples_per_second": 988.871,
+ "eval_steps_per_second": 1.931,
+ "step": 135000
+ },
+ {
+ "epoch": 0.6967769230372354,
+ "grad_norm": 22235.15234375,
+ "learning_rate": 3.731869864962004e-05,
+ "loss": 0.4183,
+ "step": 135050
+ },
+ {
+ "epoch": 0.6970348930198482,
+ "grad_norm": 21969.208984375,
+ "learning_rate": 3.728336634007105e-05,
+ "loss": 0.41,
+ "step": 135100
+ },
+ {
+ "epoch": 0.6972928630024611,
+ "grad_norm": 22907.32421875,
+ "learning_rate": 3.724804081841488e-05,
+ "loss": 0.4213,
+ "step": 135150
+ },
+ {
+ "epoch": 0.6975508329850738,
+ "grad_norm": 22994.646484375,
+ "learning_rate": 3.721272210350757e-05,
+ "loss": 0.4103,
+ "step": 135200
+ },
+ {
+ "epoch": 0.6978088029676867,
+ "grad_norm": 22118.224609375,
+ "learning_rate": 3.717741021420162e-05,
+ "loss": 0.4195,
+ "step": 135250
+ },
+ {
+ "epoch": 0.6980667729502995,
+ "grad_norm": 19673.6484375,
+ "learning_rate": 3.7142105169345764e-05,
+ "loss": 0.4105,
+ "step": 135300
+ },
+ {
+ "epoch": 0.6983247429329124,
+ "grad_norm": 23110.041015625,
+ "learning_rate": 3.71068069877852e-05,
+ "loss": 0.4132,
+ "step": 135350
+ },
+ {
+ "epoch": 0.6985827129155251,
+ "grad_norm": 26589.453125,
+ "learning_rate": 3.707151568836144e-05,
+ "loss": 0.4171,
+ "step": 135400
+ },
+ {
+ "epoch": 0.6988406828981379,
+ "grad_norm": 25272.74609375,
+ "learning_rate": 3.7036231289912206e-05,
+ "loss": 0.4098,
+ "step": 135450
+ },
+ {
+ "epoch": 0.6990986528807508,
+ "grad_norm": 23238.626953125,
+ "learning_rate": 3.700095381127172e-05,
+ "loss": 0.4102,
+ "step": 135500
+ },
+ {
+ "epoch": 0.6993566228633636,
+ "grad_norm": 25412.8203125,
+ "learning_rate": 3.696568327127036e-05,
+ "loss": 0.4131,
+ "step": 135550
+ },
+ {
+ "epoch": 0.6996145928459765,
+ "grad_norm": 22329.0703125,
+ "learning_rate": 3.693041968873488e-05,
+ "loss": 0.4196,
+ "step": 135600
+ },
+ {
+ "epoch": 0.6998725628285892,
+ "grad_norm": 23497.068359375,
+ "learning_rate": 3.6895163082488294e-05,
+ "loss": 0.4137,
+ "step": 135650
+ },
+ {
+ "epoch": 0.7001305328112021,
+ "grad_norm": 23415.0859375,
+ "learning_rate": 3.6859913471349906e-05,
+ "loss": 0.4088,
+ "step": 135700
+ },
+ {
+ "epoch": 0.7003885027938149,
+ "grad_norm": 24474.064453125,
+ "learning_rate": 3.682467087413525e-05,
+ "loss": 0.4122,
+ "step": 135750
+ },
+ {
+ "epoch": 0.7006464727764278,
+ "grad_norm": 24427.3359375,
+ "learning_rate": 3.678943530965615e-05,
+ "loss": 0.4133,
+ "step": 135800
+ },
+ {
+ "epoch": 0.7009044427590405,
+ "grad_norm": 24399.58203125,
+ "learning_rate": 3.675420679672068e-05,
+ "loss": 0.4113,
+ "step": 135850
+ },
+ {
+ "epoch": 0.7011624127416534,
+ "grad_norm": 22070.033203125,
+ "learning_rate": 3.671898535413313e-05,
+ "loss": 0.4099,
+ "step": 135900
+ },
+ {
+ "epoch": 0.7014203827242662,
+ "grad_norm": 21846.20703125,
+ "learning_rate": 3.668377100069404e-05,
+ "loss": 0.4164,
+ "step": 135950
+ },
+ {
+ "epoch": 0.7016783527068791,
+ "grad_norm": 21927.2265625,
+ "learning_rate": 3.664856375520012e-05,
+ "loss": 0.4124,
+ "step": 136000
+ },
+ {
+ "epoch": 0.7019363226894918,
+ "grad_norm": 22155.341796875,
+ "learning_rate": 3.6613363636444344e-05,
+ "loss": 0.416,
+ "step": 136050
+ },
+ {
+ "epoch": 0.7021942926721046,
+ "grad_norm": 23344.486328125,
+ "learning_rate": 3.6578170663215826e-05,
+ "loss": 0.4162,
+ "step": 136100
+ },
+ {
+ "epoch": 0.7024522626547175,
+ "grad_norm": 23390.642578125,
+ "learning_rate": 3.6542984854299936e-05,
+ "loss": 0.4082,
+ "step": 136150
+ },
+ {
+ "epoch": 0.7027102326373303,
+ "grad_norm": 22980.90625,
+ "learning_rate": 3.6507806228478125e-05,
+ "loss": 0.4067,
+ "step": 136200
+ },
+ {
+ "epoch": 0.7029682026199432,
+ "grad_norm": 22321.662109375,
+ "learning_rate": 3.6472634804528095e-05,
+ "loss": 0.4129,
+ "step": 136250
+ },
+ {
+ "epoch": 0.7032261726025559,
+ "grad_norm": 22719.455078125,
+ "learning_rate": 3.643747060122366e-05,
+ "loss": 0.4169,
+ "step": 136300
+ },
+ {
+ "epoch": 0.7034841425851688,
+ "grad_norm": 25283.494140625,
+ "learning_rate": 3.640231363733481e-05,
+ "loss": 0.4081,
+ "step": 136350
+ },
+ {
+ "epoch": 0.7037421125677816,
+ "grad_norm": 24430.919921875,
+ "learning_rate": 3.636716393162764e-05,
+ "loss": 0.4187,
+ "step": 136400
+ },
+ {
+ "epoch": 0.7040000825503945,
+ "grad_norm": 23372.662109375,
+ "learning_rate": 3.633202150286435e-05,
+ "loss": 0.4117,
+ "step": 136450
+ },
+ {
+ "epoch": 0.7042580525330072,
+ "grad_norm": 23912.595703125,
+ "learning_rate": 3.6296886369803346e-05,
+ "loss": 0.4126,
+ "step": 136500
+ },
+ {
+ "epoch": 0.7045160225156201,
+ "grad_norm": 24092.0390625,
+ "learning_rate": 3.626175855119903e-05,
+ "loss": 0.4163,
+ "step": 136550
+ },
+ {
+ "epoch": 0.7047739924982329,
+ "grad_norm": 23452.2421875,
+ "learning_rate": 3.6226638065802e-05,
+ "loss": 0.4088,
+ "step": 136600
+ },
+ {
+ "epoch": 0.7050319624808458,
+ "grad_norm": 24399.787109375,
+ "learning_rate": 3.6191524932358845e-05,
+ "loss": 0.4139,
+ "step": 136650
+ },
+ {
+ "epoch": 0.7052899324634585,
+ "grad_norm": 23295.599609375,
+ "learning_rate": 3.6156419169612287e-05,
+ "loss": 0.4112,
+ "step": 136700
+ },
+ {
+ "epoch": 0.7055479024460714,
+ "grad_norm": 25809.876953125,
+ "learning_rate": 3.6121320796301126e-05,
+ "loss": 0.4141,
+ "step": 136750
+ },
+ {
+ "epoch": 0.7058058724286842,
+ "grad_norm": 21679.818359375,
+ "learning_rate": 3.608622983116018e-05,
+ "loss": 0.4183,
+ "step": 136800
+ },
+ {
+ "epoch": 0.706063842411297,
+ "grad_norm": 24492.578125,
+ "learning_rate": 3.6051146292920334e-05,
+ "loss": 0.4103,
+ "step": 136850
+ },
+ {
+ "epoch": 0.7063218123939099,
+ "grad_norm": 24805.59375,
+ "learning_rate": 3.601607020030847e-05,
+ "loss": 0.4129,
+ "step": 136900
+ },
+ {
+ "epoch": 0.7065797823765226,
+ "grad_norm": 23000.9765625,
+ "learning_rate": 3.5981001572047566e-05,
+ "loss": 0.4091,
+ "step": 136950
+ },
+ {
+ "epoch": 0.7068377523591355,
+ "grad_norm": 24590.6875,
+ "learning_rate": 3.594594042685655e-05,
+ "loss": 0.4061,
+ "step": 137000
+ },
+ {
+ "epoch": 0.7070957223417483,
+ "grad_norm": 22223.16015625,
+ "learning_rate": 3.5910886783450416e-05,
+ "loss": 0.4174,
+ "step": 137050
+ },
+ {
+ "epoch": 0.7073536923243612,
+ "grad_norm": 28207.7578125,
+ "learning_rate": 3.587584066054007e-05,
+ "loss": 0.4119,
+ "step": 137100
+ },
+ {
+ "epoch": 0.7076116623069739,
+ "grad_norm": 23703.271484375,
+ "learning_rate": 3.584080207683249e-05,
+ "loss": 0.4104,
+ "step": 137150
+ },
+ {
+ "epoch": 0.7078696322895868,
+ "grad_norm": 24903.92578125,
+ "learning_rate": 3.580577105103059e-05,
+ "loss": 0.4139,
+ "step": 137200
+ },
+ {
+ "epoch": 0.7081276022721996,
+ "grad_norm": 21130.029296875,
+ "learning_rate": 3.5770747601833235e-05,
+ "loss": 0.4208,
+ "step": 137250
+ },
+ {
+ "epoch": 0.7083855722548125,
+ "grad_norm": 22223.611328125,
+ "learning_rate": 3.5735731747935306e-05,
+ "loss": 0.4118,
+ "step": 137300
+ },
+ {
+ "epoch": 0.7086435422374252,
+ "grad_norm": 21862.12109375,
+ "learning_rate": 3.570072350802753e-05,
+ "loss": 0.4101,
+ "step": 137350
+ },
+ {
+ "epoch": 0.708901512220038,
+ "grad_norm": 22504.25390625,
+ "learning_rate": 3.566572290079667e-05,
+ "loss": 0.4187,
+ "step": 137400
+ },
+ {
+ "epoch": 0.7091594822026509,
+ "grad_norm": 21898.53125,
+ "learning_rate": 3.563072994492535e-05,
+ "loss": 0.4068,
+ "step": 137450
+ },
+ {
+ "epoch": 0.7094174521852638,
+ "grad_norm": 21629.5859375,
+ "learning_rate": 3.559574465909215e-05,
+ "loss": 0.4107,
+ "step": 137500
+ },
+ {
+ "epoch": 0.7096754221678765,
+ "grad_norm": 23078.080078125,
+ "learning_rate": 3.5560767061971515e-05,
+ "loss": 0.4093,
+ "step": 137550
+ },
+ {
+ "epoch": 0.7099333921504893,
+ "grad_norm": 21831.11328125,
+ "learning_rate": 3.5525797172233826e-05,
+ "loss": 0.4083,
+ "step": 137600
+ },
+ {
+ "epoch": 0.7101913621331022,
+ "grad_norm": 20934.220703125,
+ "learning_rate": 3.5490835008545334e-05,
+ "loss": 0.4143,
+ "step": 137650
+ },
+ {
+ "epoch": 0.710449332115715,
+ "grad_norm": 21335.0,
+ "learning_rate": 3.545588058956816e-05,
+ "loss": 0.4104,
+ "step": 137700
+ },
+ {
+ "epoch": 0.7107073020983279,
+ "grad_norm": 20424.279296875,
+ "learning_rate": 3.542093393396031e-05,
+ "loss": 0.4117,
+ "step": 137750
+ },
+ {
+ "epoch": 0.7109652720809406,
+ "grad_norm": 24527.76171875,
+ "learning_rate": 3.5385995060375596e-05,
+ "loss": 0.4128,
+ "step": 137800
+ },
+ {
+ "epoch": 0.7112232420635535,
+ "grad_norm": 23370.17578125,
+ "learning_rate": 3.535106398746376e-05,
+ "loss": 0.4149,
+ "step": 137850
+ },
+ {
+ "epoch": 0.7114812120461663,
+ "grad_norm": 22996.2890625,
+ "learning_rate": 3.531614073387028e-05,
+ "loss": 0.412,
+ "step": 137900
+ },
+ {
+ "epoch": 0.7117391820287792,
+ "grad_norm": 26592.931640625,
+ "learning_rate": 3.528122531823657e-05,
+ "loss": 0.4111,
+ "step": 137950
+ },
+ {
+ "epoch": 0.7119971520113919,
+ "grad_norm": 22353.35546875,
+ "learning_rate": 3.5246317759199745e-05,
+ "loss": 0.412,
+ "step": 138000
+ },
+ {
+ "epoch": 0.7122551219940048,
+ "grad_norm": 22266.91796875,
+ "learning_rate": 3.521141807539281e-05,
+ "loss": 0.4113,
+ "step": 138050
+ },
+ {
+ "epoch": 0.7125130919766176,
+ "grad_norm": 21723.318359375,
+ "learning_rate": 3.517652628544457e-05,
+ "loss": 0.4058,
+ "step": 138100
+ },
+ {
+ "epoch": 0.7127710619592305,
+ "grad_norm": 23738.322265625,
+ "learning_rate": 3.5141642407979535e-05,
+ "loss": 0.4072,
+ "step": 138150
+ },
+ {
+ "epoch": 0.7130290319418432,
+ "grad_norm": 25993.587890625,
+ "learning_rate": 3.5106766461618083e-05,
+ "loss": 0.4066,
+ "step": 138200
+ },
+ {
+ "epoch": 0.713287001924456,
+ "grad_norm": 23321.55859375,
+ "learning_rate": 3.50718984649763e-05,
+ "loss": 0.4104,
+ "step": 138250
+ },
+ {
+ "epoch": 0.7135449719070689,
+ "grad_norm": 22022.267578125,
+ "learning_rate": 3.503703843666605e-05,
+ "loss": 0.4096,
+ "step": 138300
+ },
+ {
+ "epoch": 0.7138029418896817,
+ "grad_norm": 22249.640625,
+ "learning_rate": 3.500218639529493e-05,
+ "loss": 0.4121,
+ "step": 138350
+ },
+ {
+ "epoch": 0.7140609118722946,
+ "grad_norm": 21145.283203125,
+ "learning_rate": 3.496734235946632e-05,
+ "loss": 0.4126,
+ "step": 138400
+ },
+ {
+ "epoch": 0.7143188818549073,
+ "grad_norm": 22439.38671875,
+ "learning_rate": 3.493250634777924e-05,
+ "loss": 0.4076,
+ "step": 138450
+ },
+ {
+ "epoch": 0.7145768518375202,
+ "grad_norm": 25641.93359375,
+ "learning_rate": 3.4897678378828516e-05,
+ "loss": 0.4105,
+ "step": 138500
+ },
+ {
+ "epoch": 0.714834821820133,
+ "grad_norm": 22200.46875,
+ "learning_rate": 3.486285847120465e-05,
+ "loss": 0.4097,
+ "step": 138550
+ },
+ {
+ "epoch": 0.7150927918027459,
+ "grad_norm": 22691.666015625,
+ "learning_rate": 3.482804664349381e-05,
+ "loss": 0.4154,
+ "step": 138600
+ },
+ {
+ "epoch": 0.7153507617853586,
+ "grad_norm": 22139.16796875,
+ "learning_rate": 3.479324291427788e-05,
+ "loss": 0.4124,
+ "step": 138650
+ },
+ {
+ "epoch": 0.7156087317679715,
+ "grad_norm": 23695.7578125,
+ "learning_rate": 3.4758447302134414e-05,
+ "loss": 0.4174,
+ "step": 138700
+ },
+ {
+ "epoch": 0.7158667017505843,
+ "grad_norm": 24720.06640625,
+ "learning_rate": 3.472365982563666e-05,
+ "loss": 0.4095,
+ "step": 138750
+ },
+ {
+ "epoch": 0.7161246717331972,
+ "grad_norm": 22861.171875,
+ "learning_rate": 3.4688880503353474e-05,
+ "loss": 0.4039,
+ "step": 138800
+ },
+ {
+ "epoch": 0.7163826417158099,
+ "grad_norm": 22751.833984375,
+ "learning_rate": 3.465410935384939e-05,
+ "loss": 0.4175,
+ "step": 138850
+ },
+ {
+ "epoch": 0.7166406116984227,
+ "grad_norm": 22689.5,
+ "learning_rate": 3.461934639568457e-05,
+ "loss": 0.4133,
+ "step": 138900
+ },
+ {
+ "epoch": 0.7168985816810356,
+ "grad_norm": 23292.1328125,
+ "learning_rate": 3.458459164741482e-05,
+ "loss": 0.4062,
+ "step": 138950
+ },
+ {
+ "epoch": 0.7171565516636484,
+ "grad_norm": 22390.515625,
+ "learning_rate": 3.4549845127591563e-05,
+ "loss": 0.4169,
+ "step": 139000
+ },
+ {
+ "epoch": 0.7174145216462613,
+ "grad_norm": 23531.9921875,
+ "learning_rate": 3.451510685476178e-05,
+ "loss": 0.4084,
+ "step": 139050
+ },
+ {
+ "epoch": 0.717672491628874,
+ "grad_norm": 23847.154296875,
+ "learning_rate": 3.448037684746812e-05,
+ "loss": 0.4134,
+ "step": 139100
+ },
+ {
+ "epoch": 0.7179304616114869,
+ "grad_norm": 22651.15234375,
+ "learning_rate": 3.4445655124248774e-05,
+ "loss": 0.4118,
+ "step": 139150
+ },
+ {
+ "epoch": 0.7181884315940997,
+ "grad_norm": 21893.123046875,
+ "learning_rate": 3.441094170363755e-05,
+ "loss": 0.4065,
+ "step": 139200
+ },
+ {
+ "epoch": 0.7184464015767126,
+ "grad_norm": 22238.685546875,
+ "learning_rate": 3.4376236604163756e-05,
+ "loss": 0.4164,
+ "step": 139250
+ },
+ {
+ "epoch": 0.7187043715593253,
+ "grad_norm": 25605.083984375,
+ "learning_rate": 3.434153984435234e-05,
+ "loss": 0.4105,
+ "step": 139300
+ },
+ {
+ "epoch": 0.7189623415419382,
+ "grad_norm": 22414.0703125,
+ "learning_rate": 3.430685144272374e-05,
+ "loss": 0.4095,
+ "step": 139350
+ },
+ {
+ "epoch": 0.719220311524551,
+ "grad_norm": 22067.443359375,
+ "learning_rate": 3.4272171417793954e-05,
+ "loss": 0.4105,
+ "step": 139400
+ },
+ {
+ "epoch": 0.7194782815071639,
+ "grad_norm": 22398.36328125,
+ "learning_rate": 3.423749978807454e-05,
+ "loss": 0.4065,
+ "step": 139450
+ },
+ {
+ "epoch": 0.7197362514897766,
+ "grad_norm": 25660.017578125,
+ "learning_rate": 3.420283657207248e-05,
+ "loss": 0.4139,
+ "step": 139500
+ },
+ {
+ "epoch": 0.7199942214723895,
+ "grad_norm": 27245.4609375,
+ "learning_rate": 3.416818178829039e-05,
+ "loss": 0.4106,
+ "step": 139550
+ },
+ {
+ "epoch": 0.7202521914550023,
+ "grad_norm": 22430.6484375,
+ "learning_rate": 3.413353545522628e-05,
+ "loss": 0.4103,
+ "step": 139600
+ },
+ {
+ "epoch": 0.7205101614376151,
+ "grad_norm": 25269.876953125,
+ "learning_rate": 3.409889759137373e-05,
+ "loss": 0.4073,
+ "step": 139650
+ },
+ {
+ "epoch": 0.720768131420228,
+ "grad_norm": 22811.275390625,
+ "learning_rate": 3.406426821522172e-05,
+ "loss": 0.4156,
+ "step": 139700
+ },
+ {
+ "epoch": 0.7210261014028407,
+ "grad_norm": 21838.966796875,
+ "learning_rate": 3.402964734525477e-05,
+ "loss": 0.4132,
+ "step": 139750
+ },
+ {
+ "epoch": 0.7212840713854536,
+ "grad_norm": 22130.935546875,
+ "learning_rate": 3.39950349999528e-05,
+ "loss": 0.418,
+ "step": 139800
+ },
+ {
+ "epoch": 0.7215420413680664,
+ "grad_norm": 22744.779296875,
+ "learning_rate": 3.396043119779123e-05,
+ "loss": 0.4098,
+ "step": 139850
+ },
+ {
+ "epoch": 0.7218000113506793,
+ "grad_norm": 22559.07421875,
+ "learning_rate": 3.392583595724093e-05,
+ "loss": 0.4159,
+ "step": 139900
+ },
+ {
+ "epoch": 0.722057981333292,
+ "grad_norm": 20920.349609375,
+ "learning_rate": 3.3891249296768116e-05,
+ "loss": 0.406,
+ "step": 139950
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "grad_norm": 20708.716796875,
+ "learning_rate": 3.38566712348345e-05,
+ "loss": 0.4102,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7223159513159049,
+ "eval_loss": 0.39852654933929443,
+ "eval_runtime": 3128.1309,
+ "eval_samples_per_second": 991.365,
+ "eval_steps_per_second": 1.936,
+ "step": 140000
+ },
+ {
+ "epoch": 0.7225739212985177,
+ "grad_norm": 24440.734375,
+ "learning_rate": 3.382210178989718e-05,
+ "loss": 0.4144,
+ "step": 140050
+ },
+ {
+ "epoch": 0.7228318912811306,
+ "grad_norm": 22715.88671875,
+ "learning_rate": 3.378754098040867e-05,
+ "loss": 0.4146,
+ "step": 140100
+ },
+ {
+ "epoch": 0.7230898612637433,
+ "grad_norm": 23713.474609375,
+ "learning_rate": 3.375298882481683e-05,
+ "loss": 0.4089,
+ "step": 140150
+ },
+ {
+ "epoch": 0.7233478312463562,
+ "grad_norm": 24705.048828125,
+ "learning_rate": 3.371844534156497e-05,
+ "loss": 0.4052,
+ "step": 140200
+ },
+ {
+ "epoch": 0.723605801228969,
+ "grad_norm": 22624.98046875,
+ "learning_rate": 3.368391054909169e-05,
+ "loss": 0.4155,
+ "step": 140250
+ },
+ {
+ "epoch": 0.7238637712115819,
+ "grad_norm": 24774.72265625,
+ "learning_rate": 3.364938446583103e-05,
+ "loss": 0.4058,
+ "step": 140300
+ },
+ {
+ "epoch": 0.7241217411941946,
+ "grad_norm": 24109.02734375,
+ "learning_rate": 3.361486711021235e-05,
+ "loss": 0.4169,
+ "step": 140350
+ },
+ {
+ "epoch": 0.7243797111768074,
+ "grad_norm": 20315.724609375,
+ "learning_rate": 3.3580358500660284e-05,
+ "loss": 0.4135,
+ "step": 140400
+ },
+ {
+ "epoch": 0.7246376811594203,
+ "grad_norm": 26642.84765625,
+ "learning_rate": 3.3545858655594935e-05,
+ "loss": 0.4182,
+ "step": 140450
+ },
+ {
+ "epoch": 0.7248956511420331,
+ "grad_norm": 23466.93359375,
+ "learning_rate": 3.351136759343161e-05,
+ "loss": 0.4098,
+ "step": 140500
+ },
+ {
+ "epoch": 0.725153621124646,
+ "grad_norm": 25247.11328125,
+ "learning_rate": 3.3476885332580985e-05,
+ "loss": 0.4085,
+ "step": 140550
+ },
+ {
+ "epoch": 0.7254115911072587,
+ "grad_norm": 25220.11328125,
+ "learning_rate": 3.3442411891449e-05,
+ "loss": 0.4139,
+ "step": 140600
+ },
+ {
+ "epoch": 0.7256695610898716,
+ "grad_norm": 21836.095703125,
+ "learning_rate": 3.3407947288436936e-05,
+ "loss": 0.4127,
+ "step": 140650
+ },
+ {
+ "epoch": 0.7259275310724844,
+ "grad_norm": 22301.443359375,
+ "learning_rate": 3.3373491541941346e-05,
+ "loss": 0.4127,
+ "step": 140700
+ },
+ {
+ "epoch": 0.7261855010550973,
+ "grad_norm": 21902.615234375,
+ "learning_rate": 3.333904467035399e-05,
+ "loss": 0.4111,
+ "step": 140750
+ },
+ {
+ "epoch": 0.72644347103771,
+ "grad_norm": 21408.71484375,
+ "learning_rate": 3.3304606692061984e-05,
+ "loss": 0.4095,
+ "step": 140800
+ },
+ {
+ "epoch": 0.7267014410203229,
+ "grad_norm": 26146.03515625,
+ "learning_rate": 3.3270177625447626e-05,
+ "loss": 0.4096,
+ "step": 140850
+ },
+ {
+ "epoch": 0.7269594110029357,
+ "grad_norm": 22772.9921875,
+ "learning_rate": 3.323575748888852e-05,
+ "loss": 0.4109,
+ "step": 140900
+ },
+ {
+ "epoch": 0.7272173809855486,
+ "grad_norm": 24654.810546875,
+ "learning_rate": 3.320134630075742e-05,
+ "loss": 0.4135,
+ "step": 140950
+ },
+ {
+ "epoch": 0.7274753509681613,
+ "grad_norm": 23458.103515625,
+ "learning_rate": 3.31669440794224e-05,
+ "loss": 0.4128,
+ "step": 141000
+ },
+ {
+ "epoch": 0.7277333209507741,
+ "grad_norm": 22455.630859375,
+ "learning_rate": 3.3132550843246654e-05,
+ "loss": 0.411,
+ "step": 141050
+ },
+ {
+ "epoch": 0.727991290933387,
+ "grad_norm": 22372.08203125,
+ "learning_rate": 3.3098166610588655e-05,
+ "loss": 0.413,
+ "step": 141100
+ },
+ {
+ "epoch": 0.7282492609159998,
+ "grad_norm": 22878.216796875,
+ "learning_rate": 3.306379139980206e-05,
+ "loss": 0.4054,
+ "step": 141150
+ },
+ {
+ "epoch": 0.7285072308986127,
+ "grad_norm": 22959.708984375,
+ "learning_rate": 3.302942522923563e-05,
+ "loss": 0.4114,
+ "step": 141200
+ },
+ {
+ "epoch": 0.7287652008812254,
+ "grad_norm": 22574.986328125,
+ "learning_rate": 3.2995068117233417e-05,
+ "loss": 0.4105,
+ "step": 141250
+ },
+ {
+ "epoch": 0.7290231708638383,
+ "grad_norm": 23770.279296875,
+ "learning_rate": 3.2960720082134555e-05,
+ "loss": 0.4091,
+ "step": 141300
+ },
+ {
+ "epoch": 0.7292811408464511,
+ "grad_norm": 23017.416015625,
+ "learning_rate": 3.292638114227338e-05,
+ "loss": 0.411,
+ "step": 141350
+ },
+ {
+ "epoch": 0.729539110829064,
+ "grad_norm": 23605.982421875,
+ "learning_rate": 3.289205131597932e-05,
+ "loss": 0.4097,
+ "step": 141400
+ },
+ {
+ "epoch": 0.7297970808116767,
+ "grad_norm": 22409.12890625,
+ "learning_rate": 3.2857730621577006e-05,
+ "loss": 0.4096,
+ "step": 141450
+ },
+ {
+ "epoch": 0.7300550507942896,
+ "grad_norm": 22681.11328125,
+ "learning_rate": 3.282341907738613e-05,
+ "loss": 0.4066,
+ "step": 141500
+ },
+ {
+ "epoch": 0.7303130207769024,
+ "grad_norm": 27188.859375,
+ "learning_rate": 3.278911670172154e-05,
+ "loss": 0.4104,
+ "step": 141550
+ },
+ {
+ "epoch": 0.7305709907595153,
+ "grad_norm": 25134.85546875,
+ "learning_rate": 3.2754823512893225e-05,
+ "loss": 0.4105,
+ "step": 141600
+ },
+ {
+ "epoch": 0.730828960742128,
+ "grad_norm": 21408.478515625,
+ "learning_rate": 3.2720539529206154e-05,
+ "loss": 0.412,
+ "step": 141650
+ },
+ {
+ "epoch": 0.7310869307247408,
+ "grad_norm": 21062.59375,
+ "learning_rate": 3.26862647689605e-05,
+ "loss": 0.411,
+ "step": 141700
+ },
+ {
+ "epoch": 0.7313449007073537,
+ "grad_norm": 21591.23828125,
+ "learning_rate": 3.265199925045143e-05,
+ "loss": 0.4171,
+ "step": 141750
+ },
+ {
+ "epoch": 0.7316028706899665,
+ "grad_norm": 23328.751953125,
+ "learning_rate": 3.261774299196926e-05,
+ "loss": 0.4127,
+ "step": 141800
+ },
+ {
+ "epoch": 0.7318608406725794,
+ "grad_norm": 27247.59375,
+ "learning_rate": 3.258349601179928e-05,
+ "loss": 0.4087,
+ "step": 141850
+ },
+ {
+ "epoch": 0.7321188106551921,
+ "grad_norm": 24500.822265625,
+ "learning_rate": 3.254925832822188e-05,
+ "loss": 0.4015,
+ "step": 141900
+ },
+ {
+ "epoch": 0.732376780637805,
+ "grad_norm": 25855.849609375,
+ "learning_rate": 3.251502995951247e-05,
+ "loss": 0.4125,
+ "step": 141950
+ },
+ {
+ "epoch": 0.7326347506204178,
+ "grad_norm": 23075.234375,
+ "learning_rate": 3.248081092394148e-05,
+ "loss": 0.4112,
+ "step": 142000
+ },
+ {
+ "epoch": 0.7328927206030307,
+ "grad_norm": 25166.712890625,
+ "learning_rate": 3.2446601239774405e-05,
+ "loss": 0.4121,
+ "step": 142050
+ },
+ {
+ "epoch": 0.7331506905856434,
+ "grad_norm": 23327.337890625,
+ "learning_rate": 3.241240092527167e-05,
+ "loss": 0.41,
+ "step": 142100
+ },
+ {
+ "epoch": 0.7334086605682563,
+ "grad_norm": 34138.34375,
+ "learning_rate": 3.237820999868876e-05,
+ "loss": 0.413,
+ "step": 142150
+ },
+ {
+ "epoch": 0.7336666305508691,
+ "grad_norm": 23031.2109375,
+ "learning_rate": 3.234402847827612e-05,
+ "loss": 0.414,
+ "step": 142200
+ },
+ {
+ "epoch": 0.733924600533482,
+ "grad_norm": 23237.44921875,
+ "learning_rate": 3.230985638227921e-05,
+ "loss": 0.4159,
+ "step": 142250
+ },
+ {
+ "epoch": 0.7341825705160947,
+ "grad_norm": 21437.705078125,
+ "learning_rate": 3.2275693728938395e-05,
+ "loss": 0.4078,
+ "step": 142300
+ },
+ {
+ "epoch": 0.7344405404987076,
+ "grad_norm": 23815.9140625,
+ "learning_rate": 3.224154053648906e-05,
+ "loss": 0.4135,
+ "step": 142350
+ },
+ {
+ "epoch": 0.7346985104813204,
+ "grad_norm": 26809.724609375,
+ "learning_rate": 3.2207396823161514e-05,
+ "loss": 0.409,
+ "step": 142400
+ },
+ {
+ "epoch": 0.7349564804639332,
+ "grad_norm": 21905.6484375,
+ "learning_rate": 3.2173262607181e-05,
+ "loss": 0.41,
+ "step": 142450
+ },
+ {
+ "epoch": 0.735214450446546,
+ "grad_norm": 23628.076171875,
+ "learning_rate": 3.2139137906767743e-05,
+ "loss": 0.4175,
+ "step": 142500
+ },
+ {
+ "epoch": 0.7354724204291588,
+ "grad_norm": 24156.837890625,
+ "learning_rate": 3.210502274013679e-05,
+ "loss": 0.4114,
+ "step": 142550
+ },
+ {
+ "epoch": 0.7357303904117717,
+ "grad_norm": 21517.404296875,
+ "learning_rate": 3.207091712549819e-05,
+ "loss": 0.4112,
+ "step": 142600
+ },
+ {
+ "epoch": 0.7359883603943845,
+ "grad_norm": 22684.734375,
+ "learning_rate": 3.203682108105685e-05,
+ "loss": 0.417,
+ "step": 142650
+ },
+ {
+ "epoch": 0.7362463303769974,
+ "grad_norm": 22205.361328125,
+ "learning_rate": 3.2002734625012585e-05,
+ "loss": 0.4111,
+ "step": 142700
+ },
+ {
+ "epoch": 0.7365043003596101,
+ "grad_norm": 21131.41796875,
+ "learning_rate": 3.196865777556008e-05,
+ "loss": 0.4114,
+ "step": 142750
+ },
+ {
+ "epoch": 0.736762270342223,
+ "grad_norm": 23506.66796875,
+ "learning_rate": 3.1934590550888894e-05,
+ "loss": 0.4183,
+ "step": 142800
+ },
+ {
+ "epoch": 0.7370202403248358,
+ "grad_norm": 24435.4609375,
+ "learning_rate": 3.190053296918345e-05,
+ "loss": 0.4166,
+ "step": 142850
+ },
+ {
+ "epoch": 0.7372782103074487,
+ "grad_norm": 22610.4296875,
+ "learning_rate": 3.186648504862303e-05,
+ "loss": 0.4109,
+ "step": 142900
+ },
+ {
+ "epoch": 0.7375361802900614,
+ "grad_norm": 24722.974609375,
+ "learning_rate": 3.183244680738178e-05,
+ "loss": 0.4093,
+ "step": 142950
+ },
+ {
+ "epoch": 0.7377941502726743,
+ "grad_norm": 23927.6640625,
+ "learning_rate": 3.1798418263628596e-05,
+ "loss": 0.4106,
+ "step": 143000
+ },
+ {
+ "epoch": 0.7380521202552871,
+ "grad_norm": 23958.216796875,
+ "learning_rate": 3.176439943552732e-05,
+ "loss": 0.4067,
+ "step": 143050
+ },
+ {
+ "epoch": 0.7383100902379,
+ "grad_norm": 23272.818359375,
+ "learning_rate": 3.1730390341236496e-05,
+ "loss": 0.4086,
+ "step": 143100
+ },
+ {
+ "epoch": 0.7385680602205127,
+ "grad_norm": 20998.751953125,
+ "learning_rate": 3.1696390998909556e-05,
+ "loss": 0.4099,
+ "step": 143150
+ },
+ {
+ "epoch": 0.7388260302031255,
+ "grad_norm": 24493.677734375,
+ "learning_rate": 3.166240142669464e-05,
+ "loss": 0.413,
+ "step": 143200
+ },
+ {
+ "epoch": 0.7390840001857384,
+ "grad_norm": 22639.927734375,
+ "learning_rate": 3.162842164273479e-05,
+ "loss": 0.4105,
+ "step": 143250
+ },
+ {
+ "epoch": 0.7393419701683512,
+ "grad_norm": 24407.029296875,
+ "learning_rate": 3.15944516651677e-05,
+ "loss": 0.4188,
+ "step": 143300
+ },
+ {
+ "epoch": 0.7395999401509641,
+ "grad_norm": 26538.68359375,
+ "learning_rate": 3.156049151212591e-05,
+ "loss": 0.4135,
+ "step": 143350
+ },
+ {
+ "epoch": 0.7398579101335768,
+ "grad_norm": 24519.060546875,
+ "learning_rate": 3.1526541201736695e-05,
+ "loss": 0.4141,
+ "step": 143400
+ },
+ {
+ "epoch": 0.7401158801161897,
+ "grad_norm": 21236.681640625,
+ "learning_rate": 3.149260075212206e-05,
+ "loss": 0.4096,
+ "step": 143450
+ },
+ {
+ "epoch": 0.7403738500988025,
+ "grad_norm": 24463.015625,
+ "learning_rate": 3.1458670181398796e-05,
+ "loss": 0.4035,
+ "step": 143500
+ },
+ {
+ "epoch": 0.7406318200814154,
+ "grad_norm": 26984.408203125,
+ "learning_rate": 3.1424749507678336e-05,
+ "loss": 0.4115,
+ "step": 143550
+ },
+ {
+ "epoch": 0.7408897900640281,
+ "grad_norm": 25619.35546875,
+ "learning_rate": 3.139083874906691e-05,
+ "loss": 0.4131,
+ "step": 143600
+ },
+ {
+ "epoch": 0.741147760046641,
+ "grad_norm": 24277.7890625,
+ "learning_rate": 3.13569379236654e-05,
+ "loss": 0.3994,
+ "step": 143650
+ },
+ {
+ "epoch": 0.7414057300292538,
+ "grad_norm": 24007.654296875,
+ "learning_rate": 3.1323047049569446e-05,
+ "loss": 0.4091,
+ "step": 143700
+ },
+ {
+ "epoch": 0.7416637000118667,
+ "grad_norm": 21688.703125,
+ "learning_rate": 3.12891661448693e-05,
+ "loss": 0.4176,
+ "step": 143750
+ },
+ {
+ "epoch": 0.7419216699944794,
+ "grad_norm": 22735.900390625,
+ "learning_rate": 3.125529522764995e-05,
+ "loss": 0.4091,
+ "step": 143800
+ },
+ {
+ "epoch": 0.7421796399770922,
+ "grad_norm": 23359.259765625,
+ "learning_rate": 3.122143431599105e-05,
+ "loss": 0.4125,
+ "step": 143850
+ },
+ {
+ "epoch": 0.7424376099597051,
+ "grad_norm": 26637.263671875,
+ "learning_rate": 3.118758342796687e-05,
+ "loss": 0.4137,
+ "step": 143900
+ },
+ {
+ "epoch": 0.7426955799423179,
+ "grad_norm": 24977.3984375,
+ "learning_rate": 3.1153742581646406e-05,
+ "loss": 0.4094,
+ "step": 143950
+ },
+ {
+ "epoch": 0.7429535499249308,
+ "grad_norm": 25850.91796875,
+ "learning_rate": 3.111991179509318e-05,
+ "loss": 0.4092,
+ "step": 144000
+ },
+ {
+ "epoch": 0.7432115199075435,
+ "grad_norm": 22823.0625,
+ "learning_rate": 3.1086091086365474e-05,
+ "loss": 0.4111,
+ "step": 144050
+ },
+ {
+ "epoch": 0.7434694898901564,
+ "grad_norm": 24187.640625,
+ "learning_rate": 3.1052280473516076e-05,
+ "loss": 0.414,
+ "step": 144100
+ },
+ {
+ "epoch": 0.7437274598727692,
+ "grad_norm": 21726.537109375,
+ "learning_rate": 3.101847997459249e-05,
+ "loss": 0.4098,
+ "step": 144150
+ },
+ {
+ "epoch": 0.7439854298553821,
+ "grad_norm": 23075.27734375,
+ "learning_rate": 3.098468960763671e-05,
+ "loss": 0.4084,
+ "step": 144200
+ },
+ {
+ "epoch": 0.7442433998379948,
+ "grad_norm": 24207.529296875,
+ "learning_rate": 3.095090939068541e-05,
+ "loss": 0.4156,
+ "step": 144250
+ },
+ {
+ "epoch": 0.7445013698206077,
+ "grad_norm": 25209.39453125,
+ "learning_rate": 3.091713934176982e-05,
+ "loss": 0.4122,
+ "step": 144300
+ },
+ {
+ "epoch": 0.7447593398032205,
+ "grad_norm": 24308.361328125,
+ "learning_rate": 3.088337947891573e-05,
+ "loss": 0.408,
+ "step": 144350
+ },
+ {
+ "epoch": 0.7450173097858334,
+ "grad_norm": 22416.990234375,
+ "learning_rate": 3.0849629820143517e-05,
+ "loss": 0.4136,
+ "step": 144400
+ },
+ {
+ "epoch": 0.7452752797684461,
+ "grad_norm": 24417.943359375,
+ "learning_rate": 3.081589038346806e-05,
+ "loss": 0.4079,
+ "step": 144450
+ },
+ {
+ "epoch": 0.745533249751059,
+ "grad_norm": 21519.650390625,
+ "learning_rate": 3.078216118689885e-05,
+ "loss": 0.4073,
+ "step": 144500
+ },
+ {
+ "epoch": 0.7457912197336718,
+ "grad_norm": 22821.1796875,
+ "learning_rate": 3.074844224843986e-05,
+ "loss": 0.4058,
+ "step": 144550
+ },
+ {
+ "epoch": 0.7460491897162846,
+ "grad_norm": 22559.86328125,
+ "learning_rate": 3.071473358608963e-05,
+ "loss": 0.413,
+ "step": 144600
+ },
+ {
+ "epoch": 0.7463071596988974,
+ "grad_norm": 25803.658203125,
+ "learning_rate": 3.068103521784115e-05,
+ "loss": 0.4077,
+ "step": 144650
+ },
+ {
+ "epoch": 0.7465651296815102,
+ "grad_norm": 27601.787109375,
+ "learning_rate": 3.0647347161681983e-05,
+ "loss": 0.4057,
+ "step": 144700
+ },
+ {
+ "epoch": 0.7468230996641231,
+ "grad_norm": 21363.67578125,
+ "learning_rate": 3.061366943559417e-05,
+ "loss": 0.4082,
+ "step": 144750
+ },
+ {
+ "epoch": 0.7470810696467359,
+ "grad_norm": 24007.3046875,
+ "learning_rate": 3.058000205755421e-05,
+ "loss": 0.4121,
+ "step": 144800
+ },
+ {
+ "epoch": 0.7473390396293488,
+ "grad_norm": 29940.8125,
+ "learning_rate": 3.054634504553312e-05,
+ "loss": 0.4046,
+ "step": 144850
+ },
+ {
+ "epoch": 0.7475970096119615,
+ "grad_norm": 24161.90234375,
+ "learning_rate": 3.0512698417496334e-05,
+ "loss": 0.4108,
+ "step": 144900
+ },
+ {
+ "epoch": 0.7478549795945744,
+ "grad_norm": 22984.619140625,
+ "learning_rate": 3.0479062191403785e-05,
+ "loss": 0.4158,
+ "step": 144950
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "grad_norm": 26418.95703125,
+ "learning_rate": 3.0445436385209836e-05,
+ "loss": 0.4059,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7481129495771872,
+ "eval_loss": 0.3971329629421234,
+ "eval_runtime": 3201.285,
+ "eval_samples_per_second": 968.711,
+ "eval_steps_per_second": 1.892,
+ "step": 145000
+ },
+ {
+ "epoch": 0.7483709195598001,
+ "grad_norm": 22503.525390625,
+ "learning_rate": 3.0411821016863308e-05,
+ "loss": 0.4048,
+ "step": 145050
+ },
+ {
+ "epoch": 0.7486288895424128,
+ "grad_norm": 23114.525390625,
+ "learning_rate": 3.03782161043074e-05,
+ "loss": 0.4111,
+ "step": 145100
+ },
+ {
+ "epoch": 0.7488868595250256,
+ "grad_norm": 23249.794921875,
+ "learning_rate": 3.0344621665479778e-05,
+ "loss": 0.4093,
+ "step": 145150
+ },
+ {
+ "epoch": 0.7491448295076385,
+ "grad_norm": 23568.833984375,
+ "learning_rate": 3.0311037718312518e-05,
+ "loss": 0.4166,
+ "step": 145200
+ },
+ {
+ "epoch": 0.7494027994902513,
+ "grad_norm": 21794.6015625,
+ "learning_rate": 3.027746428073206e-05,
+ "loss": 0.4122,
+ "step": 145250
+ },
+ {
+ "epoch": 0.7496607694728641,
+ "grad_norm": 23710.212890625,
+ "learning_rate": 3.024390137065929e-05,
+ "loss": 0.4066,
+ "step": 145300
+ },
+ {
+ "epoch": 0.7499187394554769,
+ "grad_norm": 23179.240234375,
+ "learning_rate": 3.0210349006009385e-05,
+ "loss": 0.4127,
+ "step": 145350
+ },
+ {
+ "epoch": 0.7501767094380898,
+ "grad_norm": 25111.078125,
+ "learning_rate": 3.017680720469199e-05,
+ "loss": 0.4128,
+ "step": 145400
+ },
+ {
+ "epoch": 0.7504346794207026,
+ "grad_norm": 24289.095703125,
+ "learning_rate": 3.0143275984611042e-05,
+ "loss": 0.4167,
+ "step": 145450
+ },
+ {
+ "epoch": 0.7506926494033155,
+ "grad_norm": 22695.802734375,
+ "learning_rate": 3.0109755363664893e-05,
+ "loss": 0.4135,
+ "step": 145500
+ },
+ {
+ "epoch": 0.7509506193859282,
+ "grad_norm": 26995.833984375,
+ "learning_rate": 3.0076245359746163e-05,
+ "loss": 0.4057,
+ "step": 145550
+ },
+ {
+ "epoch": 0.7512085893685411,
+ "grad_norm": 21887.568359375,
+ "learning_rate": 3.004274599074185e-05,
+ "loss": 0.4089,
+ "step": 145600
+ },
+ {
+ "epoch": 0.7514665593511539,
+ "grad_norm": 27344.78125,
+ "learning_rate": 3.00092572745333e-05,
+ "loss": 0.4156,
+ "step": 145650
+ },
+ {
+ "epoch": 0.7517245293337668,
+ "grad_norm": 25476.15234375,
+ "learning_rate": 2.9975779228996104e-05,
+ "loss": 0.4113,
+ "step": 145700
+ },
+ {
+ "epoch": 0.7519824993163795,
+ "grad_norm": 24602.64453125,
+ "learning_rate": 2.9942311872000215e-05,
+ "loss": 0.4077,
+ "step": 145750
+ },
+ {
+ "epoch": 0.7522404692989924,
+ "grad_norm": 24124.984375,
+ "learning_rate": 2.990885522140985e-05,
+ "loss": 0.4122,
+ "step": 145800
+ },
+ {
+ "epoch": 0.7524984392816052,
+ "grad_norm": 24945.2109375,
+ "learning_rate": 2.987540929508354e-05,
+ "loss": 0.409,
+ "step": 145850
+ },
+ {
+ "epoch": 0.752756409264218,
+ "grad_norm": 26535.109375,
+ "learning_rate": 2.9841974110874037e-05,
+ "loss": 0.4132,
+ "step": 145900
+ },
+ {
+ "epoch": 0.7530143792468308,
+ "grad_norm": 21566.904296875,
+ "learning_rate": 2.980854968662843e-05,
+ "loss": 0.4073,
+ "step": 145950
+ },
+ {
+ "epoch": 0.7532723492294436,
+ "grad_norm": 22965.73828125,
+ "learning_rate": 2.9775136040188007e-05,
+ "loss": 0.4124,
+ "step": 146000
+ },
+ {
+ "epoch": 0.7535303192120565,
+ "grad_norm": 25307.123046875,
+ "learning_rate": 2.974173318938833e-05,
+ "loss": 0.4134,
+ "step": 146050
+ },
+ {
+ "epoch": 0.7537882891946693,
+ "grad_norm": 22280.431640625,
+ "learning_rate": 2.9708341152059226e-05,
+ "loss": 0.4085,
+ "step": 146100
+ },
+ {
+ "epoch": 0.7540462591772822,
+ "grad_norm": 25268.705078125,
+ "learning_rate": 2.9674959946024662e-05,
+ "loss": 0.4031,
+ "step": 146150
+ },
+ {
+ "epoch": 0.7543042291598949,
+ "grad_norm": 20014.28125,
+ "learning_rate": 2.9641589589102918e-05,
+ "loss": 0.4093,
+ "step": 146200
+ },
+ {
+ "epoch": 0.7545621991425078,
+ "grad_norm": 28430.544921875,
+ "learning_rate": 2.9608230099106427e-05,
+ "loss": 0.4112,
+ "step": 146250
+ },
+ {
+ "epoch": 0.7548201691251206,
+ "grad_norm": 21031.328125,
+ "learning_rate": 2.9574881493841867e-05,
+ "loss": 0.4084,
+ "step": 146300
+ },
+ {
+ "epoch": 0.7550781391077335,
+ "grad_norm": 24118.916015625,
+ "learning_rate": 2.9541543791110032e-05,
+ "loss": 0.4152,
+ "step": 146350
+ },
+ {
+ "epoch": 0.7553361090903462,
+ "grad_norm": 20663.740234375,
+ "learning_rate": 2.950821700870598e-05,
+ "loss": 0.409,
+ "step": 146400
+ },
+ {
+ "epoch": 0.7555940790729591,
+ "grad_norm": 23081.328125,
+ "learning_rate": 2.9474901164418877e-05,
+ "loss": 0.4089,
+ "step": 146450
+ },
+ {
+ "epoch": 0.7558520490555719,
+ "grad_norm": 24167.80859375,
+ "learning_rate": 2.9441596276032085e-05,
+ "loss": 0.4096,
+ "step": 146500
+ },
+ {
+ "epoch": 0.7561100190381848,
+ "grad_norm": 24959.595703125,
+ "learning_rate": 2.940830236132313e-05,
+ "loss": 0.4109,
+ "step": 146550
+ },
+ {
+ "epoch": 0.7563679890207975,
+ "grad_norm": 22731.36328125,
+ "learning_rate": 2.9375019438063622e-05,
+ "loss": 0.41,
+ "step": 146600
+ },
+ {
+ "epoch": 0.7566259590034103,
+ "grad_norm": 24127.41015625,
+ "learning_rate": 2.9341747524019368e-05,
+ "loss": 0.4078,
+ "step": 146650
+ },
+ {
+ "epoch": 0.7568839289860232,
+ "grad_norm": 27476.791015625,
+ "learning_rate": 2.9308486636950254e-05,
+ "loss": 0.4063,
+ "step": 146700
+ },
+ {
+ "epoch": 0.757141898968636,
+ "grad_norm": 24664.61328125,
+ "learning_rate": 2.9275236794610328e-05,
+ "loss": 0.4086,
+ "step": 146750
+ },
+ {
+ "epoch": 0.7573998689512488,
+ "grad_norm": 23883.0625,
+ "learning_rate": 2.9241998014747664e-05,
+ "loss": 0.4046,
+ "step": 146800
+ },
+ {
+ "epoch": 0.7576578389338616,
+ "grad_norm": 23431.509765625,
+ "learning_rate": 2.9208770315104515e-05,
+ "loss": 0.4054,
+ "step": 146850
+ },
+ {
+ "epoch": 0.7579158089164745,
+ "grad_norm": 25177.9453125,
+ "learning_rate": 2.9175553713417176e-05,
+ "loss": 0.4094,
+ "step": 146900
+ },
+ {
+ "epoch": 0.7581737788990873,
+ "grad_norm": 22862.201171875,
+ "learning_rate": 2.9142348227416e-05,
+ "loss": 0.4073,
+ "step": 146950
+ },
+ {
+ "epoch": 0.7584317488817002,
+ "grad_norm": 21731.240234375,
+ "learning_rate": 2.9109153874825478e-05,
+ "loss": 0.4081,
+ "step": 147000
+ },
+ {
+ "epoch": 0.7586897188643129,
+ "grad_norm": 24952.87109375,
+ "learning_rate": 2.9075970673364083e-05,
+ "loss": 0.4092,
+ "step": 147050
+ },
+ {
+ "epoch": 0.7589476888469258,
+ "grad_norm": 23138.029296875,
+ "learning_rate": 2.9042798640744385e-05,
+ "loss": 0.4051,
+ "step": 147100
+ },
+ {
+ "epoch": 0.7592056588295386,
+ "grad_norm": 21496.501953125,
+ "learning_rate": 2.900963779467295e-05,
+ "loss": 0.4096,
+ "step": 147150
+ },
+ {
+ "epoch": 0.7594636288121515,
+ "grad_norm": 22243.36328125,
+ "learning_rate": 2.8976488152850406e-05,
+ "loss": 0.3985,
+ "step": 147200
+ },
+ {
+ "epoch": 0.7597215987947642,
+ "grad_norm": 24515.029296875,
+ "learning_rate": 2.894334973297137e-05,
+ "loss": 0.4043,
+ "step": 147250
+ },
+ {
+ "epoch": 0.759979568777377,
+ "grad_norm": 23431.802734375,
+ "learning_rate": 2.8910222552724553e-05,
+ "loss": 0.4092,
+ "step": 147300
+ },
+ {
+ "epoch": 0.7602375387599899,
+ "grad_norm": 24192.44140625,
+ "learning_rate": 2.8877106629792515e-05,
+ "loss": 0.413,
+ "step": 147350
+ },
+ {
+ "epoch": 0.7604955087426027,
+ "grad_norm": 24239.015625,
+ "learning_rate": 2.884400198185196e-05,
+ "loss": 0.4064,
+ "step": 147400
+ },
+ {
+ "epoch": 0.7607534787252155,
+ "grad_norm": 22407.27734375,
+ "learning_rate": 2.881090862657348e-05,
+ "loss": 0.4086,
+ "step": 147450
+ },
+ {
+ "epoch": 0.7610114487078283,
+ "grad_norm": 24915.517578125,
+ "learning_rate": 2.877782658162166e-05,
+ "loss": 0.4067,
+ "step": 147500
+ },
+ {
+ "epoch": 0.7612694186904412,
+ "grad_norm": 23721.33984375,
+ "learning_rate": 2.8744755864655098e-05,
+ "loss": 0.4078,
+ "step": 147550
+ },
+ {
+ "epoch": 0.761527388673054,
+ "grad_norm": 23041.625,
+ "learning_rate": 2.8711696493326233e-05,
+ "loss": 0.4092,
+ "step": 147600
+ },
+ {
+ "epoch": 0.7617853586556669,
+ "grad_norm": 24021.81640625,
+ "learning_rate": 2.867864848528158e-05,
+ "loss": 0.4116,
+ "step": 147650
+ },
+ {
+ "epoch": 0.7620433286382796,
+ "grad_norm": 21309.7890625,
+ "learning_rate": 2.8645611858161502e-05,
+ "loss": 0.414,
+ "step": 147700
+ },
+ {
+ "epoch": 0.7623012986208925,
+ "grad_norm": 21959.544921875,
+ "learning_rate": 2.8612586629600307e-05,
+ "loss": 0.4113,
+ "step": 147750
+ },
+ {
+ "epoch": 0.7625592686035053,
+ "grad_norm": 22090.75,
+ "learning_rate": 2.857957281722623e-05,
+ "loss": 0.41,
+ "step": 147800
+ },
+ {
+ "epoch": 0.7628172385861182,
+ "grad_norm": 21273.6640625,
+ "learning_rate": 2.854657043866138e-05,
+ "loss": 0.4043,
+ "step": 147850
+ },
+ {
+ "epoch": 0.7630752085687309,
+ "grad_norm": 22781.33984375,
+ "learning_rate": 2.8513579511521825e-05,
+ "loss": 0.4009,
+ "step": 147900
+ },
+ {
+ "epoch": 0.7633331785513437,
+ "grad_norm": 24383.95703125,
+ "learning_rate": 2.8480600053417472e-05,
+ "loss": 0.4077,
+ "step": 147950
+ },
+ {
+ "epoch": 0.7635911485339566,
+ "grad_norm": 23988.673828125,
+ "learning_rate": 2.8447632081952104e-05,
+ "loss": 0.4048,
+ "step": 148000
+ },
+ {
+ "epoch": 0.7638491185165694,
+ "grad_norm": 24593.1484375,
+ "learning_rate": 2.8414675614723397e-05,
+ "loss": 0.4145,
+ "step": 148050
+ },
+ {
+ "epoch": 0.7641070884991822,
+ "grad_norm": 25818.216796875,
+ "learning_rate": 2.838173066932287e-05,
+ "loss": 0.408,
+ "step": 148100
+ },
+ {
+ "epoch": 0.764365058481795,
+ "grad_norm": 25780.1796875,
+ "learning_rate": 2.8348797263335886e-05,
+ "loss": 0.4109,
+ "step": 148150
+ },
+ {
+ "epoch": 0.7646230284644079,
+ "grad_norm": 22835.51171875,
+ "learning_rate": 2.8315875414341687e-05,
+ "loss": 0.4037,
+ "step": 148200
+ },
+ {
+ "epoch": 0.7648809984470207,
+ "grad_norm": 22711.501953125,
+ "learning_rate": 2.8282965139913296e-05,
+ "loss": 0.4117,
+ "step": 148250
+ },
+ {
+ "epoch": 0.7651389684296336,
+ "grad_norm": 22654.080078125,
+ "learning_rate": 2.825006645761758e-05,
+ "loss": 0.4094,
+ "step": 148300
+ },
+ {
+ "epoch": 0.7653969384122463,
+ "grad_norm": 23406.8671875,
+ "learning_rate": 2.821717938501526e-05,
+ "loss": 0.4096,
+ "step": 148350
+ },
+ {
+ "epoch": 0.7656549083948592,
+ "grad_norm": 23591.68359375,
+ "learning_rate": 2.8184303939660745e-05,
+ "loss": 0.4087,
+ "step": 148400
+ },
+ {
+ "epoch": 0.765912878377472,
+ "grad_norm": 21550.94140625,
+ "learning_rate": 2.815144013910237e-05,
+ "loss": 0.4046,
+ "step": 148450
+ },
+ {
+ "epoch": 0.7661708483600849,
+ "grad_norm": 23503.48046875,
+ "learning_rate": 2.8118588000882177e-05,
+ "loss": 0.4116,
+ "step": 148500
+ },
+ {
+ "epoch": 0.7664288183426976,
+ "grad_norm": 25247.244140625,
+ "learning_rate": 2.8085747542536e-05,
+ "loss": 0.4023,
+ "step": 148550
+ },
+ {
+ "epoch": 0.7666867883253105,
+ "grad_norm": 23665.91796875,
+ "learning_rate": 2.805291878159344e-05,
+ "loss": 0.4117,
+ "step": 148600
+ },
+ {
+ "epoch": 0.7669447583079233,
+ "grad_norm": 22785.59765625,
+ "learning_rate": 2.8020101735577837e-05,
+ "loss": 0.4084,
+ "step": 148650
+ },
+ {
+ "epoch": 0.7672027282905362,
+ "grad_norm": 20447.72265625,
+ "learning_rate": 2.7987296422006327e-05,
+ "loss": 0.4091,
+ "step": 148700
+ },
+ {
+ "epoch": 0.7674606982731489,
+ "grad_norm": 24965.869140625,
+ "learning_rate": 2.795450285838974e-05,
+ "loss": 0.4067,
+ "step": 148750
+ },
+ {
+ "epoch": 0.7677186682557617,
+ "grad_norm": 24323.09765625,
+ "learning_rate": 2.7921721062232637e-05,
+ "loss": 0.4037,
+ "step": 148800
+ },
+ {
+ "epoch": 0.7679766382383746,
+ "grad_norm": 23956.177734375,
+ "learning_rate": 2.7888951051033314e-05,
+ "loss": 0.4079,
+ "step": 148850
+ },
+ {
+ "epoch": 0.7682346082209874,
+ "grad_norm": 24222.4140625,
+ "learning_rate": 2.7856192842283756e-05,
+ "loss": 0.4112,
+ "step": 148900
+ },
+ {
+ "epoch": 0.7684925782036002,
+ "grad_norm": 24444.046875,
+ "learning_rate": 2.782344645346966e-05,
+ "loss": 0.4148,
+ "step": 148950
+ },
+ {
+ "epoch": 0.768750548186213,
+ "grad_norm": 23160.578125,
+ "learning_rate": 2.779071190207046e-05,
+ "loss": 0.4063,
+ "step": 149000
+ },
+ {
+ "epoch": 0.7690085181688259,
+ "grad_norm": 25806.732421875,
+ "learning_rate": 2.7757989205559142e-05,
+ "loss": 0.4112,
+ "step": 149050
+ },
+ {
+ "epoch": 0.7692664881514387,
+ "grad_norm": 21389.734375,
+ "learning_rate": 2.7725278381402524e-05,
+ "loss": 0.4104,
+ "step": 149100
+ },
+ {
+ "epoch": 0.7695244581340516,
+ "grad_norm": 23550.23828125,
+ "learning_rate": 2.769257944706098e-05,
+ "loss": 0.4121,
+ "step": 149150
+ },
+ {
+ "epoch": 0.7697824281166643,
+ "grad_norm": 21442.373046875,
+ "learning_rate": 2.765989241998854e-05,
+ "loss": 0.4087,
+ "step": 149200
+ },
+ {
+ "epoch": 0.7700403980992772,
+ "grad_norm": 23958.978515625,
+ "learning_rate": 2.7627217317632993e-05,
+ "loss": 0.4136,
+ "step": 149250
+ },
+ {
+ "epoch": 0.77029836808189,
+ "grad_norm": 22143.07421875,
+ "learning_rate": 2.759455415743556e-05,
+ "loss": 0.41,
+ "step": 149300
+ },
+ {
+ "epoch": 0.7705563380645029,
+ "grad_norm": 22873.86328125,
+ "learning_rate": 2.7561902956831294e-05,
+ "loss": 0.4094,
+ "step": 149350
+ },
+ {
+ "epoch": 0.7708143080471156,
+ "grad_norm": 22419.3046875,
+ "learning_rate": 2.7529263733248734e-05,
+ "loss": 0.4133,
+ "step": 149400
+ },
+ {
+ "epoch": 0.7710722780297284,
+ "grad_norm": 22167.474609375,
+ "learning_rate": 2.7496636504110075e-05,
+ "loss": 0.4181,
+ "step": 149450
+ },
+ {
+ "epoch": 0.7713302480123413,
+ "grad_norm": 25449.96875,
+ "learning_rate": 2.74640212868311e-05,
+ "loss": 0.412,
+ "step": 149500
+ },
+ {
+ "epoch": 0.7715882179949541,
+ "grad_norm": 22876.767578125,
+ "learning_rate": 2.7431418098821154e-05,
+ "loss": 0.4087,
+ "step": 149550
+ },
+ {
+ "epoch": 0.7718461879775669,
+ "grad_norm": 25600.65625,
+ "learning_rate": 2.7398826957483235e-05,
+ "loss": 0.4133,
+ "step": 149600
+ },
+ {
+ "epoch": 0.7721041579601797,
+ "grad_norm": 21764.0,
+ "learning_rate": 2.7366247880213834e-05,
+ "loss": 0.4073,
+ "step": 149650
+ },
+ {
+ "epoch": 0.7723621279427926,
+ "grad_norm": 21836.0625,
+ "learning_rate": 2.7333680884403046e-05,
+ "loss": 0.4165,
+ "step": 149700
+ },
+ {
+ "epoch": 0.7726200979254054,
+ "grad_norm": 22049.466796875,
+ "learning_rate": 2.7301125987434496e-05,
+ "loss": 0.4104,
+ "step": 149750
+ },
+ {
+ "epoch": 0.7728780679080183,
+ "grad_norm": 25398.28515625,
+ "learning_rate": 2.7268583206685348e-05,
+ "loss": 0.4036,
+ "step": 149800
+ },
+ {
+ "epoch": 0.773136037890631,
+ "grad_norm": 22303.654296875,
+ "learning_rate": 2.72360525595263e-05,
+ "loss": 0.4077,
+ "step": 149850
+ },
+ {
+ "epoch": 0.7733940078732439,
+ "grad_norm": 24734.65234375,
+ "learning_rate": 2.7203534063321633e-05,
+ "loss": 0.409,
+ "step": 149900
+ },
+ {
+ "epoch": 0.7736519778558567,
+ "grad_norm": 22068.283203125,
+ "learning_rate": 2.7171027735429023e-05,
+ "loss": 0.4148,
+ "step": 149950
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "grad_norm": 23250.4921875,
+ "learning_rate": 2.7138533593199766e-05,
+ "loss": 0.4062,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7739099478384696,
+ "eval_loss": 0.3953176736831665,
+ "eval_runtime": 3196.6561,
+ "eval_samples_per_second": 970.114,
+ "eval_steps_per_second": 1.895,
+ "step": 150000
+ },
+ {
+ "epoch": 0.7741679178210823,
+ "grad_norm": 26452.75390625,
+ "learning_rate": 2.710605165397859e-05,
+ "loss": 0.4098,
+ "step": 150050
+ },
+ {
+ "epoch": 0.7744258878036951,
+ "grad_norm": 23934.783203125,
+ "learning_rate": 2.707358193510371e-05,
+ "loss": 0.4113,
+ "step": 150100
+ },
+ {
+ "epoch": 0.774683857786308,
+ "grad_norm": 22443.591796875,
+ "learning_rate": 2.7041124453906884e-05,
+ "loss": 0.4119,
+ "step": 150150
+ },
+ {
+ "epoch": 0.7749418277689208,
+ "grad_norm": 23333.529296875,
+ "learning_rate": 2.7008679227713214e-05,
+ "loss": 0.4029,
+ "step": 150200
+ },
+ {
+ "epoch": 0.7751997977515336,
+ "grad_norm": 22431.576171875,
+ "learning_rate": 2.6976246273841388e-05,
+ "loss": 0.4045,
+ "step": 150250
+ },
+ {
+ "epoch": 0.7754577677341464,
+ "grad_norm": 26959.68359375,
+ "learning_rate": 2.694382560960348e-05,
+ "loss": 0.4072,
+ "step": 150300
+ },
+ {
+ "epoch": 0.7757157377167593,
+ "grad_norm": 21064.66015625,
+ "learning_rate": 2.6911417252304994e-05,
+ "loss": 0.411,
+ "step": 150350
+ },
+ {
+ "epoch": 0.7759737076993721,
+ "grad_norm": 23242.583984375,
+ "learning_rate": 2.6879021219244906e-05,
+ "loss": 0.4075,
+ "step": 150400
+ },
+ {
+ "epoch": 0.776231677681985,
+ "grad_norm": 24738.037109375,
+ "learning_rate": 2.6846637527715546e-05,
+ "loss": 0.4069,
+ "step": 150450
+ },
+ {
+ "epoch": 0.7764896476645977,
+ "grad_norm": 23944.759765625,
+ "learning_rate": 2.681426619500277e-05,
+ "loss": 0.403,
+ "step": 150500
+ },
+ {
+ "epoch": 0.7767476176472106,
+ "grad_norm": 22064.611328125,
+ "learning_rate": 2.678190723838572e-05,
+ "loss": 0.4045,
+ "step": 150550
+ },
+ {
+ "epoch": 0.7770055876298234,
+ "grad_norm": 24025.298828125,
+ "learning_rate": 2.6749560675137002e-05,
+ "loss": 0.4087,
+ "step": 150600
+ },
+ {
+ "epoch": 0.7772635576124363,
+ "grad_norm": 20863.119140625,
+ "learning_rate": 2.6717226522522553e-05,
+ "loss": 0.4087,
+ "step": 150650
+ },
+ {
+ "epoch": 0.777521527595049,
+ "grad_norm": 24537.642578125,
+ "learning_rate": 2.668490479780179e-05,
+ "loss": 0.4127,
+ "step": 150700
+ },
+ {
+ "epoch": 0.7777794975776618,
+ "grad_norm": 24400.193359375,
+ "learning_rate": 2.665259551822733e-05,
+ "loss": 0.4066,
+ "step": 150750
+ },
+ {
+ "epoch": 0.7780374675602747,
+ "grad_norm": 25251.81640625,
+ "learning_rate": 2.6620298701045322e-05,
+ "loss": 0.4111,
+ "step": 150800
+ },
+ {
+ "epoch": 0.7782954375428875,
+ "grad_norm": 23078.0,
+ "learning_rate": 2.658801436349511e-05,
+ "loss": 0.4109,
+ "step": 150850
+ },
+ {
+ "epoch": 0.7785534075255003,
+ "grad_norm": 20437.556640625,
+ "learning_rate": 2.655574252280949e-05,
+ "loss": 0.4096,
+ "step": 150900
+ },
+ {
+ "epoch": 0.7788113775081131,
+ "grad_norm": 24091.796875,
+ "learning_rate": 2.652348319621457e-05,
+ "loss": 0.4097,
+ "step": 150950
+ },
+ {
+ "epoch": 0.779069347490726,
+ "grad_norm": 22893.6640625,
+ "learning_rate": 2.6491236400929686e-05,
+ "loss": 0.4093,
+ "step": 151000
+ },
+ {
+ "epoch": 0.7793273174733388,
+ "grad_norm": 22871.80859375,
+ "learning_rate": 2.645900215416761e-05,
+ "loss": 0.407,
+ "step": 151050
+ },
+ {
+ "epoch": 0.7795852874559517,
+ "grad_norm": 21766.30078125,
+ "learning_rate": 2.642678047313435e-05,
+ "loss": 0.4071,
+ "step": 151100
+ },
+ {
+ "epoch": 0.7798432574385644,
+ "grad_norm": 24945.544921875,
+ "learning_rate": 2.639457137502919e-05,
+ "loss": 0.4073,
+ "step": 151150
+ },
+ {
+ "epoch": 0.7801012274211773,
+ "grad_norm": 22374.009765625,
+ "learning_rate": 2.636237487704475e-05,
+ "loss": 0.409,
+ "step": 151200
+ },
+ {
+ "epoch": 0.7803591974037901,
+ "grad_norm": 23499.08984375,
+ "learning_rate": 2.6330190996366875e-05,
+ "loss": 0.4087,
+ "step": 151250
+ },
+ {
+ "epoch": 0.780617167386403,
+ "grad_norm": 24672.017578125,
+ "learning_rate": 2.629801975017469e-05,
+ "loss": 0.4075,
+ "step": 151300
+ },
+ {
+ "epoch": 0.7808751373690157,
+ "grad_norm": 23105.05078125,
+ "learning_rate": 2.6265861155640626e-05,
+ "loss": 0.4031,
+ "step": 151350
+ },
+ {
+ "epoch": 0.7811331073516286,
+ "grad_norm": 23226.171875,
+ "learning_rate": 2.6233715229930282e-05,
+ "loss": 0.4137,
+ "step": 151400
+ },
+ {
+ "epoch": 0.7813910773342414,
+ "grad_norm": 24494.732421875,
+ "learning_rate": 2.620158199020255e-05,
+ "loss": 0.4089,
+ "step": 151450
+ },
+ {
+ "epoch": 0.7816490473168543,
+ "grad_norm": 24024.236328125,
+ "learning_rate": 2.616946145360952e-05,
+ "loss": 0.4084,
+ "step": 151500
+ },
+ {
+ "epoch": 0.781907017299467,
+ "grad_norm": 21957.2265625,
+ "learning_rate": 2.613735363729649e-05,
+ "loss": 0.4079,
+ "step": 151550
+ },
+ {
+ "epoch": 0.7821649872820798,
+ "grad_norm": 22637.291015625,
+ "learning_rate": 2.6105258558402056e-05,
+ "loss": 0.4093,
+ "step": 151600
+ },
+ {
+ "epoch": 0.7824229572646927,
+ "grad_norm": 27436.56640625,
+ "learning_rate": 2.607317623405787e-05,
+ "loss": 0.4054,
+ "step": 151650
+ },
+ {
+ "epoch": 0.7826809272473055,
+ "grad_norm": 21909.509765625,
+ "learning_rate": 2.6041106681388922e-05,
+ "loss": 0.4052,
+ "step": 151700
+ },
+ {
+ "epoch": 0.7829388972299183,
+ "grad_norm": 22887.494140625,
+ "learning_rate": 2.6009049917513283e-05,
+ "loss": 0.408,
+ "step": 151750
+ },
+ {
+ "epoch": 0.7831968672125311,
+ "grad_norm": 20771.53125,
+ "learning_rate": 2.5977005959542222e-05,
+ "loss": 0.4052,
+ "step": 151800
+ },
+ {
+ "epoch": 0.783454837195144,
+ "grad_norm": 22012.322265625,
+ "learning_rate": 2.5944974824580244e-05,
+ "loss": 0.4053,
+ "step": 151850
+ },
+ {
+ "epoch": 0.7837128071777568,
+ "grad_norm": 25365.822265625,
+ "learning_rate": 2.5912956529724865e-05,
+ "loss": 0.4141,
+ "step": 151900
+ },
+ {
+ "epoch": 0.7839707771603697,
+ "grad_norm": 23211.658203125,
+ "learning_rate": 2.5880951092066885e-05,
+ "loss": 0.4094,
+ "step": 151950
+ },
+ {
+ "epoch": 0.7842287471429824,
+ "grad_norm": 21514.79296875,
+ "learning_rate": 2.584895852869018e-05,
+ "loss": 0.4056,
+ "step": 152000
+ },
+ {
+ "epoch": 0.7844867171255953,
+ "grad_norm": 23275.76953125,
+ "learning_rate": 2.581697885667176e-05,
+ "loss": 0.4076,
+ "step": 152050
+ },
+ {
+ "epoch": 0.7847446871082081,
+ "grad_norm": 24080.478515625,
+ "learning_rate": 2.578501209308174e-05,
+ "loss": 0.409,
+ "step": 152100
+ },
+ {
+ "epoch": 0.785002657090821,
+ "grad_norm": 23384.275390625,
+ "learning_rate": 2.5753058254983376e-05,
+ "loss": 0.4063,
+ "step": 152150
+ },
+ {
+ "epoch": 0.7852606270734337,
+ "grad_norm": 22736.451171875,
+ "learning_rate": 2.572111735943298e-05,
+ "loss": 0.4054,
+ "step": 152200
+ },
+ {
+ "epoch": 0.7855185970560465,
+ "grad_norm": 24730.462890625,
+ "learning_rate": 2.568918942348002e-05,
+ "loss": 0.4074,
+ "step": 152250
+ },
+ {
+ "epoch": 0.7857765670386594,
+ "grad_norm": 23020.759765625,
+ "learning_rate": 2.5657274464166996e-05,
+ "loss": 0.4143,
+ "step": 152300
+ },
+ {
+ "epoch": 0.7860345370212722,
+ "grad_norm": 22263.357421875,
+ "learning_rate": 2.56253724985295e-05,
+ "loss": 0.4075,
+ "step": 152350
+ },
+ {
+ "epoch": 0.786292507003885,
+ "grad_norm": 23515.408203125,
+ "learning_rate": 2.5593483543596165e-05,
+ "loss": 0.4055,
+ "step": 152400
+ },
+ {
+ "epoch": 0.7865504769864978,
+ "grad_norm": 21960.447265625,
+ "learning_rate": 2.55616076163887e-05,
+ "loss": 0.407,
+ "step": 152450
+ },
+ {
+ "epoch": 0.7868084469691107,
+ "grad_norm": 26880.94140625,
+ "learning_rate": 2.55297447339219e-05,
+ "loss": 0.4029,
+ "step": 152500
+ },
+ {
+ "epoch": 0.7870664169517235,
+ "grad_norm": 22276.259765625,
+ "learning_rate": 2.5497894913203492e-05,
+ "loss": 0.4038,
+ "step": 152550
+ },
+ {
+ "epoch": 0.7873243869343364,
+ "grad_norm": 22566.541015625,
+ "learning_rate": 2.5466058171234336e-05,
+ "loss": 0.4055,
+ "step": 152600
+ },
+ {
+ "epoch": 0.7875823569169491,
+ "grad_norm": 24620.486328125,
+ "learning_rate": 2.543423452500826e-05,
+ "loss": 0.4031,
+ "step": 152650
+ },
+ {
+ "epoch": 0.787840326899562,
+ "grad_norm": 24162.99609375,
+ "learning_rate": 2.540242399151208e-05,
+ "loss": 0.4075,
+ "step": 152700
+ },
+ {
+ "epoch": 0.7880982968821748,
+ "grad_norm": 25309.958984375,
+ "learning_rate": 2.537062658772572e-05,
+ "loss": 0.4052,
+ "step": 152750
+ },
+ {
+ "epoch": 0.7883562668647877,
+ "grad_norm": 22024.390625,
+ "learning_rate": 2.533884233062192e-05,
+ "loss": 0.4036,
+ "step": 152800
+ },
+ {
+ "epoch": 0.7886142368474004,
+ "grad_norm": 22356.041015625,
+ "learning_rate": 2.530707123716657e-05,
+ "loss": 0.4065,
+ "step": 152850
+ },
+ {
+ "epoch": 0.7888722068300132,
+ "grad_norm": 22957.642578125,
+ "learning_rate": 2.527531332431844e-05,
+ "loss": 0.403,
+ "step": 152900
+ },
+ {
+ "epoch": 0.7891301768126261,
+ "grad_norm": 22161.298828125,
+ "learning_rate": 2.52435686090293e-05,
+ "loss": 0.4046,
+ "step": 152950
+ },
+ {
+ "epoch": 0.7893881467952389,
+ "grad_norm": 22849.720703125,
+ "learning_rate": 2.5211837108243847e-05,
+ "loss": 0.4045,
+ "step": 153000
+ },
+ {
+ "epoch": 0.7896461167778517,
+ "grad_norm": 25891.248046875,
+ "learning_rate": 2.5180118838899756e-05,
+ "loss": 0.4083,
+ "step": 153050
+ },
+ {
+ "epoch": 0.7899040867604645,
+ "grad_norm": 23150.634765625,
+ "learning_rate": 2.5148413817927598e-05,
+ "loss": 0.4104,
+ "step": 153100
+ },
+ {
+ "epoch": 0.7901620567430774,
+ "grad_norm": 23457.515625,
+ "learning_rate": 2.511672206225094e-05,
+ "loss": 0.4101,
+ "step": 153150
+ },
+ {
+ "epoch": 0.7904200267256902,
+ "grad_norm": 21316.8828125,
+ "learning_rate": 2.508504358878621e-05,
+ "loss": 0.4091,
+ "step": 153200
+ },
+ {
+ "epoch": 0.7906779967083031,
+ "grad_norm": 25747.87109375,
+ "learning_rate": 2.5053378414442748e-05,
+ "loss": 0.4131,
+ "step": 153250
+ },
+ {
+ "epoch": 0.7909359666909158,
+ "grad_norm": 21499.56640625,
+ "learning_rate": 2.502172655612286e-05,
+ "loss": 0.4028,
+ "step": 153300
+ },
+ {
+ "epoch": 0.7911939366735287,
+ "grad_norm": 22949.970703125,
+ "learning_rate": 2.499008803072162e-05,
+ "loss": 0.4078,
+ "step": 153350
+ },
+ {
+ "epoch": 0.7914519066561415,
+ "grad_norm": 26207.181640625,
+ "learning_rate": 2.495846285512714e-05,
+ "loss": 0.4064,
+ "step": 153400
+ },
+ {
+ "epoch": 0.7917098766387544,
+ "grad_norm": 25037.625,
+ "learning_rate": 2.4926851046220246e-05,
+ "loss": 0.4067,
+ "step": 153450
+ },
+ {
+ "epoch": 0.7919678466213671,
+ "grad_norm": 24114.482421875,
+ "learning_rate": 2.4895252620874775e-05,
+ "loss": 0.4123,
+ "step": 153500
+ },
+ {
+ "epoch": 0.79222581660398,
+ "grad_norm": 24953.568359375,
+ "learning_rate": 2.4863667595957325e-05,
+ "loss": 0.4083,
+ "step": 153550
+ },
+ {
+ "epoch": 0.7924837865865928,
+ "grad_norm": 24928.2265625,
+ "learning_rate": 2.483209598832736e-05,
+ "loss": 0.4066,
+ "step": 153600
+ },
+ {
+ "epoch": 0.7927417565692056,
+ "grad_norm": 24045.166015625,
+ "learning_rate": 2.4800537814837227e-05,
+ "loss": 0.4056,
+ "step": 153650
+ },
+ {
+ "epoch": 0.7929997265518184,
+ "grad_norm": 24591.826171875,
+ "learning_rate": 2.476899309233205e-05,
+ "loss": 0.4094,
+ "step": 153700
+ },
+ {
+ "epoch": 0.7932576965344312,
+ "grad_norm": 23336.810546875,
+ "learning_rate": 2.4737461837649782e-05,
+ "loss": 0.41,
+ "step": 153750
+ },
+ {
+ "epoch": 0.7935156665170441,
+ "grad_norm": 23454.171875,
+ "learning_rate": 2.4705944067621216e-05,
+ "loss": 0.4068,
+ "step": 153800
+ },
+ {
+ "epoch": 0.7937736364996569,
+ "grad_norm": 25322.201171875,
+ "learning_rate": 2.467443979906991e-05,
+ "loss": 0.4097,
+ "step": 153850
+ },
+ {
+ "epoch": 0.7940316064822697,
+ "grad_norm": 24731.580078125,
+ "learning_rate": 2.464294904881222e-05,
+ "loss": 0.4028,
+ "step": 153900
+ },
+ {
+ "epoch": 0.7942895764648825,
+ "grad_norm": 21753.568359375,
+ "learning_rate": 2.4611471833657356e-05,
+ "loss": 0.4148,
+ "step": 153950
+ },
+ {
+ "epoch": 0.7945475464474954,
+ "grad_norm": 26548.966796875,
+ "learning_rate": 2.458000817040717e-05,
+ "loss": 0.4074,
+ "step": 154000
+ },
+ {
+ "epoch": 0.7948055164301082,
+ "grad_norm": 21149.470703125,
+ "learning_rate": 2.4548558075856414e-05,
+ "loss": 0.408,
+ "step": 154050
+ },
+ {
+ "epoch": 0.7950634864127211,
+ "grad_norm": 25742.859375,
+ "learning_rate": 2.4517121566792517e-05,
+ "loss": 0.405,
+ "step": 154100
+ },
+ {
+ "epoch": 0.7953214563953338,
+ "grad_norm": 20954.91796875,
+ "learning_rate": 2.4485698659995658e-05,
+ "loss": 0.3975,
+ "step": 154150
+ },
+ {
+ "epoch": 0.7955794263779467,
+ "grad_norm": 23551.646484375,
+ "learning_rate": 2.445428937223884e-05,
+ "loss": 0.4059,
+ "step": 154200
+ },
+ {
+ "epoch": 0.7958373963605595,
+ "grad_norm": 25214.693359375,
+ "learning_rate": 2.4422893720287654e-05,
+ "loss": 0.4008,
+ "step": 154250
+ },
+ {
+ "epoch": 0.7960953663431724,
+ "grad_norm": 25346.916015625,
+ "learning_rate": 2.4391511720900545e-05,
+ "loss": 0.4035,
+ "step": 154300
+ },
+ {
+ "epoch": 0.7963533363257851,
+ "grad_norm": 21641.23828125,
+ "learning_rate": 2.43601433908286e-05,
+ "loss": 0.4069,
+ "step": 154350
+ },
+ {
+ "epoch": 0.7966113063083979,
+ "grad_norm": 22860.998046875,
+ "learning_rate": 2.4328788746815628e-05,
+ "loss": 0.4022,
+ "step": 154400
+ },
+ {
+ "epoch": 0.7968692762910108,
+ "grad_norm": 21989.96484375,
+ "learning_rate": 2.429744780559813e-05,
+ "loss": 0.4055,
+ "step": 154450
+ },
+ {
+ "epoch": 0.7971272462736236,
+ "grad_norm": 24413.74609375,
+ "learning_rate": 2.4266120583905272e-05,
+ "loss": 0.412,
+ "step": 154500
+ },
+ {
+ "epoch": 0.7973852162562364,
+ "grad_norm": 24805.859375,
+ "learning_rate": 2.4234807098458957e-05,
+ "loss": 0.41,
+ "step": 154550
+ },
+ {
+ "epoch": 0.7976431862388492,
+ "grad_norm": 23658.326171875,
+ "learning_rate": 2.42035073659737e-05,
+ "loss": 0.41,
+ "step": 154600
+ },
+ {
+ "epoch": 0.7979011562214621,
+ "grad_norm": 25225.228515625,
+ "learning_rate": 2.417222140315669e-05,
+ "loss": 0.4069,
+ "step": 154650
+ },
+ {
+ "epoch": 0.7981591262040749,
+ "grad_norm": 23417.3828125,
+ "learning_rate": 2.414094922670777e-05,
+ "loss": 0.4102,
+ "step": 154700
+ },
+ {
+ "epoch": 0.7984170961866878,
+ "grad_norm": 25014.5078125,
+ "learning_rate": 2.4109690853319422e-05,
+ "loss": 0.412,
+ "step": 154750
+ },
+ {
+ "epoch": 0.7986750661693005,
+ "grad_norm": 25523.3125,
+ "learning_rate": 2.407844629967674e-05,
+ "loss": 0.4102,
+ "step": 154800
+ },
+ {
+ "epoch": 0.7989330361519134,
+ "grad_norm": 23173.44921875,
+ "learning_rate": 2.404721558245752e-05,
+ "loss": 0.407,
+ "step": 154850
+ },
+ {
+ "epoch": 0.7991910061345262,
+ "grad_norm": 24673.5078125,
+ "learning_rate": 2.401599871833204e-05,
+ "loss": 0.4054,
+ "step": 154900
+ },
+ {
+ "epoch": 0.799448976117139,
+ "grad_norm": 24709.765625,
+ "learning_rate": 2.398479572396331e-05,
+ "loss": 0.4097,
+ "step": 154950
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "grad_norm": 22404.29296875,
+ "learning_rate": 2.395360661600687e-05,
+ "loss": 0.4072,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7997069460997518,
+ "eval_loss": 0.39372530579566956,
+ "eval_runtime": 3195.8879,
+ "eval_samples_per_second": 970.347,
+ "eval_steps_per_second": 1.895,
+ "step": 155000
+ },
+ {
+ "epoch": 0.7999649160823646,
+ "grad_norm": 24004.09375,
+ "learning_rate": 2.3922431411110834e-05,
+ "loss": 0.4016,
+ "step": 155050
+ },
+ {
+ "epoch": 0.8002228860649775,
+ "grad_norm": 25013.6484375,
+ "learning_rate": 2.3891270125915992e-05,
+ "loss": 0.4068,
+ "step": 155100
+ },
+ {
+ "epoch": 0.8004808560475903,
+ "grad_norm": 23532.982421875,
+ "learning_rate": 2.3860122777055553e-05,
+ "loss": 0.4036,
+ "step": 155150
+ },
+ {
+ "epoch": 0.8007388260302031,
+ "grad_norm": 27413.044921875,
+ "learning_rate": 2.3828989381155426e-05,
+ "loss": 0.4098,
+ "step": 155200
+ },
+ {
+ "epoch": 0.8009967960128159,
+ "grad_norm": 25821.794921875,
+ "learning_rate": 2.379786995483399e-05,
+ "loss": 0.4076,
+ "step": 155250
+ },
+ {
+ "epoch": 0.8012547659954288,
+ "grad_norm": 23864.154296875,
+ "learning_rate": 2.37667645147022e-05,
+ "loss": 0.4082,
+ "step": 155300
+ },
+ {
+ "epoch": 0.8015127359780416,
+ "grad_norm": 22892.451171875,
+ "learning_rate": 2.3735673077363534e-05,
+ "loss": 0.4116,
+ "step": 155350
+ },
+ {
+ "epoch": 0.8017707059606545,
+ "grad_norm": 24638.51953125,
+ "learning_rate": 2.3704595659413987e-05,
+ "loss": 0.4015,
+ "step": 155400
+ },
+ {
+ "epoch": 0.8020286759432672,
+ "grad_norm": 23007.734375,
+ "learning_rate": 2.3673532277442112e-05,
+ "loss": 0.4075,
+ "step": 155450
+ },
+ {
+ "epoch": 0.8022866459258801,
+ "grad_norm": 25629.17578125,
+ "learning_rate": 2.364248294802892e-05,
+ "loss": 0.4031,
+ "step": 155500
+ },
+ {
+ "epoch": 0.8025446159084929,
+ "grad_norm": 23949.939453125,
+ "learning_rate": 2.3611447687747955e-05,
+ "loss": 0.4091,
+ "step": 155550
+ },
+ {
+ "epoch": 0.8028025858911058,
+ "grad_norm": 23120.3515625,
+ "learning_rate": 2.3580426513165228e-05,
+ "loss": 0.4106,
+ "step": 155600
+ },
+ {
+ "epoch": 0.8030605558737185,
+ "grad_norm": 26965.955078125,
+ "learning_rate": 2.3549419440839236e-05,
+ "loss": 0.4054,
+ "step": 155650
+ },
+ {
+ "epoch": 0.8033185258563313,
+ "grad_norm": 23370.33984375,
+ "learning_rate": 2.3518426487320948e-05,
+ "loss": 0.407,
+ "step": 155700
+ },
+ {
+ "epoch": 0.8035764958389442,
+ "grad_norm": 22571.12890625,
+ "learning_rate": 2.3487447669153833e-05,
+ "loss": 0.4118,
+ "step": 155750
+ },
+ {
+ "epoch": 0.803834465821557,
+ "grad_norm": 24092.56640625,
+ "learning_rate": 2.3456483002873768e-05,
+ "loss": 0.4053,
+ "step": 155800
+ },
+ {
+ "epoch": 0.8040924358041698,
+ "grad_norm": 24549.140625,
+ "learning_rate": 2.3425532505009072e-05,
+ "loss": 0.405,
+ "step": 155850
+ },
+ {
+ "epoch": 0.8043504057867826,
+ "grad_norm": 23510.904296875,
+ "learning_rate": 2.3394596192080574e-05,
+ "loss": 0.4049,
+ "step": 155900
+ },
+ {
+ "epoch": 0.8046083757693955,
+ "grad_norm": 23147.369140625,
+ "learning_rate": 2.3363674080601416e-05,
+ "loss": 0.4032,
+ "step": 155950
+ },
+ {
+ "epoch": 0.8048663457520083,
+ "grad_norm": 21877.10546875,
+ "learning_rate": 2.3332766187077264e-05,
+ "loss": 0.4006,
+ "step": 156000
+ },
+ {
+ "epoch": 0.8051243157346211,
+ "grad_norm": 24041.384765625,
+ "learning_rate": 2.330187252800614e-05,
+ "loss": 0.4056,
+ "step": 156050
+ },
+ {
+ "epoch": 0.8053822857172339,
+ "grad_norm": 23452.453125,
+ "learning_rate": 2.327099311987848e-05,
+ "loss": 0.4071,
+ "step": 156100
+ },
+ {
+ "epoch": 0.8056402556998468,
+ "grad_norm": 23023.5859375,
+ "learning_rate": 2.3240127979177123e-05,
+ "loss": 0.4095,
+ "step": 156150
+ },
+ {
+ "epoch": 0.8058982256824596,
+ "grad_norm": 23684.615234375,
+ "learning_rate": 2.3209277122377255e-05,
+ "loss": 0.4023,
+ "step": 156200
+ },
+ {
+ "epoch": 0.8061561956650725,
+ "grad_norm": 22598.732421875,
+ "learning_rate": 2.31784405659465e-05,
+ "loss": 0.4013,
+ "step": 156250
+ },
+ {
+ "epoch": 0.8064141656476852,
+ "grad_norm": 21835.93359375,
+ "learning_rate": 2.3147618326344804e-05,
+ "loss": 0.4072,
+ "step": 156300
+ },
+ {
+ "epoch": 0.806672135630298,
+ "grad_norm": 26343.41015625,
+ "learning_rate": 2.311681042002448e-05,
+ "loss": 0.4154,
+ "step": 156350
+ },
+ {
+ "epoch": 0.8069301056129109,
+ "grad_norm": 24116.162109375,
+ "learning_rate": 2.3086016863430193e-05,
+ "loss": 0.4032,
+ "step": 156400
+ },
+ {
+ "epoch": 0.8071880755955237,
+ "grad_norm": 23874.53515625,
+ "learning_rate": 2.3055237672998946e-05,
+ "loss": 0.4063,
+ "step": 156450
+ },
+ {
+ "epoch": 0.8074460455781365,
+ "grad_norm": 25624.203125,
+ "learning_rate": 2.302447286516006e-05,
+ "loss": 0.4034,
+ "step": 156500
+ },
+ {
+ "epoch": 0.8077040155607493,
+ "grad_norm": 22652.2109375,
+ "learning_rate": 2.2993722456335236e-05,
+ "loss": 0.4049,
+ "step": 156550
+ },
+ {
+ "epoch": 0.8079619855433622,
+ "grad_norm": 26234.255859375,
+ "learning_rate": 2.2962986462938385e-05,
+ "loss": 0.4035,
+ "step": 156600
+ },
+ {
+ "epoch": 0.808219955525975,
+ "grad_norm": 24374.974609375,
+ "learning_rate": 2.293226490137584e-05,
+ "loss": 0.4052,
+ "step": 156650
+ },
+ {
+ "epoch": 0.8084779255085878,
+ "grad_norm": 24195.4296875,
+ "learning_rate": 2.2901557788046146e-05,
+ "loss": 0.4072,
+ "step": 156700
+ },
+ {
+ "epoch": 0.8087358954912006,
+ "grad_norm": 24590.525390625,
+ "learning_rate": 2.2870865139340165e-05,
+ "loss": 0.4092,
+ "step": 156750
+ },
+ {
+ "epoch": 0.8089938654738135,
+ "grad_norm": 20863.509765625,
+ "learning_rate": 2.2840186971641083e-05,
+ "loss": 0.4073,
+ "step": 156800
+ },
+ {
+ "epoch": 0.8092518354564263,
+ "grad_norm": 23662.16015625,
+ "learning_rate": 2.2809523301324238e-05,
+ "loss": 0.4101,
+ "step": 156850
+ },
+ {
+ "epoch": 0.8095098054390392,
+ "grad_norm": 21700.666015625,
+ "learning_rate": 2.2778874144757357e-05,
+ "loss": 0.4075,
+ "step": 156900
+ },
+ {
+ "epoch": 0.8097677754216519,
+ "grad_norm": 29026.71484375,
+ "learning_rate": 2.274823951830036e-05,
+ "loss": 0.4005,
+ "step": 156950
+ },
+ {
+ "epoch": 0.8100257454042648,
+ "grad_norm": 27310.48828125,
+ "learning_rate": 2.2717619438305397e-05,
+ "loss": 0.4058,
+ "step": 157000
+ },
+ {
+ "epoch": 0.8102837153868776,
+ "grad_norm": 25008.673828125,
+ "learning_rate": 2.2687013921116895e-05,
+ "loss": 0.404,
+ "step": 157050
+ },
+ {
+ "epoch": 0.8105416853694904,
+ "grad_norm": 22623.57421875,
+ "learning_rate": 2.2656422983071452e-05,
+ "loss": 0.4059,
+ "step": 157100
+ },
+ {
+ "epoch": 0.8107996553521032,
+ "grad_norm": 23960.427734375,
+ "learning_rate": 2.2625846640497965e-05,
+ "loss": 0.4096,
+ "step": 157150
+ },
+ {
+ "epoch": 0.811057625334716,
+ "grad_norm": 22415.021484375,
+ "learning_rate": 2.2595284909717475e-05,
+ "loss": 0.4061,
+ "step": 157200
+ },
+ {
+ "epoch": 0.8113155953173289,
+ "grad_norm": 23358.822265625,
+ "learning_rate": 2.2564737807043233e-05,
+ "loss": 0.4003,
+ "step": 157250
+ },
+ {
+ "epoch": 0.8115735652999417,
+ "grad_norm": 21686.9765625,
+ "learning_rate": 2.2534205348780702e-05,
+ "loss": 0.4063,
+ "step": 157300
+ },
+ {
+ "epoch": 0.8118315352825545,
+ "grad_norm": 22949.484375,
+ "learning_rate": 2.2503687551227504e-05,
+ "loss": 0.407,
+ "step": 157350
+ },
+ {
+ "epoch": 0.8120895052651673,
+ "grad_norm": 21776.201171875,
+ "learning_rate": 2.2473184430673444e-05,
+ "loss": 0.4073,
+ "step": 157400
+ },
+ {
+ "epoch": 0.8123474752477802,
+ "grad_norm": 25641.17578125,
+ "learning_rate": 2.244269600340055e-05,
+ "loss": 0.4074,
+ "step": 157450
+ },
+ {
+ "epoch": 0.812605445230393,
+ "grad_norm": 22723.42578125,
+ "learning_rate": 2.2412222285682867e-05,
+ "loss": 0.4119,
+ "step": 157500
+ },
+ {
+ "epoch": 0.8128634152130059,
+ "grad_norm": 24244.48046875,
+ "learning_rate": 2.2381763293786746e-05,
+ "loss": 0.4157,
+ "step": 157550
+ },
+ {
+ "epoch": 0.8131213851956186,
+ "grad_norm": 26826.337890625,
+ "learning_rate": 2.235131904397058e-05,
+ "loss": 0.4102,
+ "step": 157600
+ },
+ {
+ "epoch": 0.8133793551782315,
+ "grad_norm": 23157.0546875,
+ "learning_rate": 2.232088955248491e-05,
+ "loss": 0.4121,
+ "step": 157650
+ },
+ {
+ "epoch": 0.8136373251608443,
+ "grad_norm": 23352.009765625,
+ "learning_rate": 2.229047483557245e-05,
+ "loss": 0.4054,
+ "step": 157700
+ },
+ {
+ "epoch": 0.8138952951434572,
+ "grad_norm": 24417.2734375,
+ "learning_rate": 2.2260074909467925e-05,
+ "loss": 0.4092,
+ "step": 157750
+ },
+ {
+ "epoch": 0.8141532651260699,
+ "grad_norm": 22345.669921875,
+ "learning_rate": 2.2229689790398283e-05,
+ "loss": 0.402,
+ "step": 157800
+ },
+ {
+ "epoch": 0.8144112351086827,
+ "grad_norm": 22904.20703125,
+ "learning_rate": 2.2199319494582492e-05,
+ "loss": 0.4067,
+ "step": 157850
+ },
+ {
+ "epoch": 0.8146692050912956,
+ "grad_norm": 24132.306640625,
+ "learning_rate": 2.216896403823162e-05,
+ "loss": 0.4094,
+ "step": 157900
+ },
+ {
+ "epoch": 0.8149271750739084,
+ "grad_norm": 24649.001953125,
+ "learning_rate": 2.2138623437548833e-05,
+ "loss": 0.4048,
+ "step": 157950
+ },
+ {
+ "epoch": 0.8151851450565212,
+ "grad_norm": 24956.458984375,
+ "learning_rate": 2.210829770872933e-05,
+ "loss": 0.4038,
+ "step": 158000
+ },
+ {
+ "epoch": 0.815443115039134,
+ "grad_norm": 24047.3515625,
+ "learning_rate": 2.2077986867960437e-05,
+ "loss": 0.407,
+ "step": 158050
+ },
+ {
+ "epoch": 0.8157010850217469,
+ "grad_norm": 22895.953125,
+ "learning_rate": 2.2047690931421476e-05,
+ "loss": 0.4033,
+ "step": 158100
+ },
+ {
+ "epoch": 0.8159590550043597,
+ "grad_norm": 22524.640625,
+ "learning_rate": 2.201740991528383e-05,
+ "loss": 0.4136,
+ "step": 158150
+ },
+ {
+ "epoch": 0.8162170249869725,
+ "grad_norm": 22507.46875,
+ "learning_rate": 2.1987143835710928e-05,
+ "loss": 0.4043,
+ "step": 158200
+ },
+ {
+ "epoch": 0.8164749949695853,
+ "grad_norm": 24044.5390625,
+ "learning_rate": 2.1956892708858202e-05,
+ "loss": 0.4099,
+ "step": 158250
+ },
+ {
+ "epoch": 0.8167329649521982,
+ "grad_norm": 26112.05859375,
+ "learning_rate": 2.1926656550873103e-05,
+ "loss": 0.4087,
+ "step": 158300
+ },
+ {
+ "epoch": 0.816990934934811,
+ "grad_norm": 25168.59375,
+ "learning_rate": 2.189643537789517e-05,
+ "loss": 0.4059,
+ "step": 158350
+ },
+ {
+ "epoch": 0.8172489049174239,
+ "grad_norm": 31289.392578125,
+ "learning_rate": 2.1866229206055804e-05,
+ "loss": 0.4048,
+ "step": 158400
+ },
+ {
+ "epoch": 0.8175068749000366,
+ "grad_norm": 27301.970703125,
+ "learning_rate": 2.1836038051478508e-05,
+ "loss": 0.4111,
+ "step": 158450
+ },
+ {
+ "epoch": 0.8177648448826494,
+ "grad_norm": 22742.66015625,
+ "learning_rate": 2.180586193027877e-05,
+ "loss": 0.3998,
+ "step": 158500
+ },
+ {
+ "epoch": 0.8180228148652623,
+ "grad_norm": 26745.51171875,
+ "learning_rate": 2.177570085856395e-05,
+ "loss": 0.4069,
+ "step": 158550
+ },
+ {
+ "epoch": 0.8182807848478751,
+ "grad_norm": 24821.93359375,
+ "learning_rate": 2.1745554852433502e-05,
+ "loss": 0.4057,
+ "step": 158600
+ },
+ {
+ "epoch": 0.8185387548304879,
+ "grad_norm": 24082.908203125,
+ "learning_rate": 2.1715423927978755e-05,
+ "loss": 0.4042,
+ "step": 158650
+ },
+ {
+ "epoch": 0.8187967248131007,
+ "grad_norm": 23584.001953125,
+ "learning_rate": 2.168530810128302e-05,
+ "loss": 0.4062,
+ "step": 158700
+ },
+ {
+ "epoch": 0.8190546947957136,
+ "grad_norm": 25795.326171875,
+ "learning_rate": 2.1655207388421532e-05,
+ "loss": 0.4101,
+ "step": 158750
+ },
+ {
+ "epoch": 0.8193126647783264,
+ "grad_norm": 22298.908203125,
+ "learning_rate": 2.1625121805461483e-05,
+ "loss": 0.4004,
+ "step": 158800
+ },
+ {
+ "epoch": 0.8195706347609392,
+ "grad_norm": 24439.970703125,
+ "learning_rate": 2.1595051368461943e-05,
+ "loss": 0.4078,
+ "step": 158850
+ },
+ {
+ "epoch": 0.819828604743552,
+ "grad_norm": 24895.5546875,
+ "learning_rate": 2.1564996093473975e-05,
+ "loss": 0.4008,
+ "step": 158900
+ },
+ {
+ "epoch": 0.8200865747261649,
+ "grad_norm": 27615.1171875,
+ "learning_rate": 2.153495599654048e-05,
+ "loss": 0.4051,
+ "step": 158950
+ },
+ {
+ "epoch": 0.8203445447087777,
+ "grad_norm": 22537.25390625,
+ "learning_rate": 2.150493109369628e-05,
+ "loss": 0.4078,
+ "step": 159000
+ },
+ {
+ "epoch": 0.8206025146913906,
+ "grad_norm": 23422.39453125,
+ "learning_rate": 2.1474921400968085e-05,
+ "loss": 0.3999,
+ "step": 159050
+ },
+ {
+ "epoch": 0.8208604846740033,
+ "grad_norm": 24678.099609375,
+ "learning_rate": 2.1444926934374475e-05,
+ "loss": 0.4038,
+ "step": 159100
+ },
+ {
+ "epoch": 0.8211184546566161,
+ "grad_norm": 25680.623046875,
+ "learning_rate": 2.1414947709925963e-05,
+ "loss": 0.4082,
+ "step": 159150
+ },
+ {
+ "epoch": 0.821376424639229,
+ "grad_norm": 26526.724609375,
+ "learning_rate": 2.1384983743624813e-05,
+ "loss": 0.4076,
+ "step": 159200
+ },
+ {
+ "epoch": 0.8216343946218418,
+ "grad_norm": 21391.701171875,
+ "learning_rate": 2.1355035051465265e-05,
+ "loss": 0.4003,
+ "step": 159250
+ },
+ {
+ "epoch": 0.8218923646044546,
+ "grad_norm": 22676.607421875,
+ "learning_rate": 2.1325101649433327e-05,
+ "loss": 0.4087,
+ "step": 159300
+ },
+ {
+ "epoch": 0.8221503345870674,
+ "grad_norm": 23139.802734375,
+ "learning_rate": 2.1295183553506855e-05,
+ "loss": 0.4102,
+ "step": 159350
+ },
+ {
+ "epoch": 0.8224083045696803,
+ "grad_norm": 23598.369140625,
+ "learning_rate": 2.1265280779655593e-05,
+ "loss": 0.4027,
+ "step": 159400
+ },
+ {
+ "epoch": 0.8226662745522931,
+ "grad_norm": 24068.453125,
+ "learning_rate": 2.1235393343841008e-05,
+ "loss": 0.4097,
+ "step": 159450
+ },
+ {
+ "epoch": 0.8229242445349059,
+ "grad_norm": 26833.779296875,
+ "learning_rate": 2.1205521262016476e-05,
+ "loss": 0.4094,
+ "step": 159500
+ },
+ {
+ "epoch": 0.8231822145175187,
+ "grad_norm": 21122.98046875,
+ "learning_rate": 2.1175664550127123e-05,
+ "loss": 0.4074,
+ "step": 159550
+ },
+ {
+ "epoch": 0.8234401845001316,
+ "grad_norm": 24398.310546875,
+ "learning_rate": 2.1145823224109884e-05,
+ "loss": 0.4081,
+ "step": 159600
+ },
+ {
+ "epoch": 0.8236981544827444,
+ "grad_norm": 20830.05078125,
+ "learning_rate": 2.111599729989348e-05,
+ "loss": 0.4031,
+ "step": 159650
+ },
+ {
+ "epoch": 0.8239561244653573,
+ "grad_norm": 24353.29296875,
+ "learning_rate": 2.108618679339841e-05,
+ "loss": 0.4037,
+ "step": 159700
+ },
+ {
+ "epoch": 0.82421409444797,
+ "grad_norm": 22828.130859375,
+ "learning_rate": 2.1056391720536928e-05,
+ "loss": 0.4021,
+ "step": 159750
+ },
+ {
+ "epoch": 0.8244720644305829,
+ "grad_norm": 21661.53515625,
+ "learning_rate": 2.1026612097213106e-05,
+ "loss": 0.4117,
+ "step": 159800
+ },
+ {
+ "epoch": 0.8247300344131957,
+ "grad_norm": 20191.279296875,
+ "learning_rate": 2.0996847939322707e-05,
+ "loss": 0.4088,
+ "step": 159850
+ },
+ {
+ "epoch": 0.8249880043958085,
+ "grad_norm": 23767.8125,
+ "learning_rate": 2.0967099262753258e-05,
+ "loss": 0.4035,
+ "step": 159900
+ },
+ {
+ "epoch": 0.8252459743784213,
+ "grad_norm": 24693.4609375,
+ "learning_rate": 2.093736608338405e-05,
+ "loss": 0.4135,
+ "step": 159950
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "grad_norm": 22759.341796875,
+ "learning_rate": 2.0907648417086027e-05,
+ "loss": 0.4048,
+ "step": 160000
+ },
+ {
+ "epoch": 0.8255039443610341,
+ "eval_loss": 0.3925068974494934,
+ "eval_runtime": 3187.046,
+ "eval_samples_per_second": 973.039,
+ "eval_steps_per_second": 1.901,
+ "step": 160000
+ },
+ {
+ "epoch": 0.825761914343647,
+ "grad_norm": 25066.45703125,
+ "learning_rate": 2.0877946279721983e-05,
+ "loss": 0.4017,
+ "step": 160050
+ },
+ {
+ "epoch": 0.8260198843262598,
+ "grad_norm": 24734.384765625,
+ "learning_rate": 2.084825968714626e-05,
+ "loss": 0.4091,
+ "step": 160100
+ },
+ {
+ "epoch": 0.8262778543088726,
+ "grad_norm": 26498.201171875,
+ "learning_rate": 2.0818588655205045e-05,
+ "loss": 0.4028,
+ "step": 160150
+ },
+ {
+ "epoch": 0.8265358242914854,
+ "grad_norm": 23436.36328125,
+ "learning_rate": 2.0788933199736143e-05,
+ "loss": 0.4019,
+ "step": 160200
+ },
+ {
+ "epoch": 0.8267937942740983,
+ "grad_norm": 23851.89453125,
+ "learning_rate": 2.075929333656904e-05,
+ "loss": 0.4055,
+ "step": 160250
+ },
+ {
+ "epoch": 0.8270517642567111,
+ "grad_norm": 23416.0625,
+ "learning_rate": 2.0729669081524977e-05,
+ "loss": 0.4075,
+ "step": 160300
+ },
+ {
+ "epoch": 0.8273097342393239,
+ "grad_norm": 22208.994140625,
+ "learning_rate": 2.070006045041673e-05,
+ "loss": 0.4047,
+ "step": 160350
+ },
+ {
+ "epoch": 0.8275677042219367,
+ "grad_norm": 21291.3515625,
+ "learning_rate": 2.067046745904888e-05,
+ "loss": 0.405,
+ "step": 160400
+ },
+ {
+ "epoch": 0.8278256742045496,
+ "grad_norm": 24646.279296875,
+ "learning_rate": 2.0640890123217565e-05,
+ "loss": 0.4076,
+ "step": 160450
+ },
+ {
+ "epoch": 0.8280836441871624,
+ "grad_norm": 22018.609375,
+ "learning_rate": 2.0611328458710595e-05,
+ "loss": 0.406,
+ "step": 160500
+ },
+ {
+ "epoch": 0.8283416141697753,
+ "grad_norm": 30070.40234375,
+ "learning_rate": 2.0581782481307415e-05,
+ "loss": 0.4099,
+ "step": 160550
+ },
+ {
+ "epoch": 0.828599584152388,
+ "grad_norm": 24574.34375,
+ "learning_rate": 2.0552252206779098e-05,
+ "loss": 0.4035,
+ "step": 160600
+ },
+ {
+ "epoch": 0.8288575541350008,
+ "grad_norm": 23137.224609375,
+ "learning_rate": 2.0522737650888313e-05,
+ "loss": 0.4006,
+ "step": 160650
+ },
+ {
+ "epoch": 0.8291155241176137,
+ "grad_norm": 22633.23828125,
+ "learning_rate": 2.0493238829389393e-05,
+ "loss": 0.4064,
+ "step": 160700
+ },
+ {
+ "epoch": 0.8293734941002265,
+ "grad_norm": 23670.525390625,
+ "learning_rate": 2.046375575802822e-05,
+ "loss": 0.4084,
+ "step": 160750
+ },
+ {
+ "epoch": 0.8296314640828393,
+ "grad_norm": 24236.7890625,
+ "learning_rate": 2.043428845254229e-05,
+ "loss": 0.413,
+ "step": 160800
+ },
+ {
+ "epoch": 0.8298894340654521,
+ "grad_norm": 25734.12890625,
+ "learning_rate": 2.0404836928660676e-05,
+ "loss": 0.3992,
+ "step": 160850
+ },
+ {
+ "epoch": 0.830147404048065,
+ "grad_norm": 23417.83203125,
+ "learning_rate": 2.037540120210401e-05,
+ "loss": 0.4069,
+ "step": 160900
+ },
+ {
+ "epoch": 0.8304053740306778,
+ "grad_norm": 24619.853515625,
+ "learning_rate": 2.0345981288584575e-05,
+ "loss": 0.4002,
+ "step": 160950
+ },
+ {
+ "epoch": 0.8306633440132906,
+ "grad_norm": 21862.111328125,
+ "learning_rate": 2.031657720380608e-05,
+ "loss": 0.4012,
+ "step": 161000
+ },
+ {
+ "epoch": 0.8309213139959034,
+ "grad_norm": 23347.91015625,
+ "learning_rate": 2.0287188963463906e-05,
+ "loss": 0.4061,
+ "step": 161050
+ },
+ {
+ "epoch": 0.8311792839785163,
+ "grad_norm": 25119.107421875,
+ "learning_rate": 2.02578165832449e-05,
+ "loss": 0.4061,
+ "step": 161100
+ },
+ {
+ "epoch": 0.8314372539611291,
+ "grad_norm": 22684.50390625,
+ "learning_rate": 2.0228460078827466e-05,
+ "loss": 0.4062,
+ "step": 161150
+ },
+ {
+ "epoch": 0.831695223943742,
+ "grad_norm": 39309.30859375,
+ "learning_rate": 2.0199119465881565e-05,
+ "loss": 0.4091,
+ "step": 161200
+ },
+ {
+ "epoch": 0.8319531939263547,
+ "grad_norm": 22076.8125,
+ "learning_rate": 2.0169794760068632e-05,
+ "loss": 0.4052,
+ "step": 161250
+ },
+ {
+ "epoch": 0.8322111639089675,
+ "grad_norm": 26682.44140625,
+ "learning_rate": 2.0140485977041636e-05,
+ "loss": 0.405,
+ "step": 161300
+ },
+ {
+ "epoch": 0.8324691338915804,
+ "grad_norm": 24586.09375,
+ "learning_rate": 2.011119313244502e-05,
+ "loss": 0.4066,
+ "step": 161350
+ },
+ {
+ "epoch": 0.8327271038741932,
+ "grad_norm": 26363.5703125,
+ "learning_rate": 2.008191624191475e-05,
+ "loss": 0.4027,
+ "step": 161400
+ },
+ {
+ "epoch": 0.832985073856806,
+ "grad_norm": 24361.9921875,
+ "learning_rate": 2.0052655321078246e-05,
+ "loss": 0.4041,
+ "step": 161450
+ },
+ {
+ "epoch": 0.8332430438394188,
+ "grad_norm": 22026.951171875,
+ "learning_rate": 2.0023410385554466e-05,
+ "loss": 0.4068,
+ "step": 161500
+ },
+ {
+ "epoch": 0.8335010138220317,
+ "grad_norm": 24540.068359375,
+ "learning_rate": 1.9994181450953725e-05,
+ "loss": 0.4036,
+ "step": 161550
+ },
+ {
+ "epoch": 0.8337589838046445,
+ "grad_norm": 25837.857421875,
+ "learning_rate": 1.9964968532877916e-05,
+ "loss": 0.4052,
+ "step": 161600
+ },
+ {
+ "epoch": 0.8340169537872573,
+ "grad_norm": 23252.900390625,
+ "learning_rate": 1.993577164692031e-05,
+ "loss": 0.4021,
+ "step": 161650
+ },
+ {
+ "epoch": 0.8342749237698701,
+ "grad_norm": 25305.177734375,
+ "learning_rate": 1.990659080866562e-05,
+ "loss": 0.4089,
+ "step": 161700
+ },
+ {
+ "epoch": 0.834532893752483,
+ "grad_norm": 25317.89453125,
+ "learning_rate": 1.9877426033690066e-05,
+ "loss": 0.4082,
+ "step": 161750
+ },
+ {
+ "epoch": 0.8347908637350958,
+ "grad_norm": 25872.2109375,
+ "learning_rate": 1.984827733756117e-05,
+ "loss": 0.4021,
+ "step": 161800
+ },
+ {
+ "epoch": 0.8350488337177087,
+ "grad_norm": 23915.955078125,
+ "learning_rate": 1.9819144735837998e-05,
+ "loss": 0.4054,
+ "step": 161850
+ },
+ {
+ "epoch": 0.8353068037003214,
+ "grad_norm": 25145.380859375,
+ "learning_rate": 1.9790028244070946e-05,
+ "loss": 0.4119,
+ "step": 161900
+ },
+ {
+ "epoch": 0.8355647736829342,
+ "grad_norm": 24318.28125,
+ "learning_rate": 1.976092787780184e-05,
+ "loss": 0.4015,
+ "step": 161950
+ },
+ {
+ "epoch": 0.8358227436655471,
+ "grad_norm": 22675.845703125,
+ "learning_rate": 1.973184365256388e-05,
+ "loss": 0.4107,
+ "step": 162000
+ },
+ {
+ "epoch": 0.83608071364816,
+ "grad_norm": 23785.451171875,
+ "learning_rate": 1.9702775583881656e-05,
+ "loss": 0.408,
+ "step": 162050
+ },
+ {
+ "epoch": 0.8363386836307727,
+ "grad_norm": 22790.47265625,
+ "learning_rate": 1.9673723687271174e-05,
+ "loss": 0.406,
+ "step": 162100
+ },
+ {
+ "epoch": 0.8365966536133855,
+ "grad_norm": 24380.498046875,
+ "learning_rate": 1.9644687978239746e-05,
+ "loss": 0.4105,
+ "step": 162150
+ },
+ {
+ "epoch": 0.8368546235959984,
+ "grad_norm": 23812.814453125,
+ "learning_rate": 1.9615668472286085e-05,
+ "loss": 0.4032,
+ "step": 162200
+ },
+ {
+ "epoch": 0.8371125935786112,
+ "grad_norm": 22820.734375,
+ "learning_rate": 1.9586665184900232e-05,
+ "loss": 0.4072,
+ "step": 162250
+ },
+ {
+ "epoch": 0.837370563561224,
+ "grad_norm": 22347.779296875,
+ "learning_rate": 1.955767813156359e-05,
+ "loss": 0.4045,
+ "step": 162300
+ },
+ {
+ "epoch": 0.8376285335438368,
+ "grad_norm": 24328.546875,
+ "learning_rate": 1.9528707327748852e-05,
+ "loss": 0.4097,
+ "step": 162350
+ },
+ {
+ "epoch": 0.8378865035264497,
+ "grad_norm": 23850.13671875,
+ "learning_rate": 1.9499752788920146e-05,
+ "loss": 0.4085,
+ "step": 162400
+ },
+ {
+ "epoch": 0.8381444735090625,
+ "grad_norm": 24967.3203125,
+ "learning_rate": 1.9470814530532756e-05,
+ "loss": 0.4056,
+ "step": 162450
+ },
+ {
+ "epoch": 0.8384024434916753,
+ "grad_norm": 23740.197265625,
+ "learning_rate": 1.9441892568033426e-05,
+ "loss": 0.4112,
+ "step": 162500
+ },
+ {
+ "epoch": 0.8386604134742881,
+ "grad_norm": 26039.447265625,
+ "learning_rate": 1.941298691686012e-05,
+ "loss": 0.405,
+ "step": 162550
+ },
+ {
+ "epoch": 0.838918383456901,
+ "grad_norm": 22781.23828125,
+ "learning_rate": 1.9384097592442102e-05,
+ "loss": 0.4043,
+ "step": 162600
+ },
+ {
+ "epoch": 0.8391763534395138,
+ "grad_norm": 25735.17578125,
+ "learning_rate": 1.935522461019998e-05,
+ "loss": 0.4021,
+ "step": 162650
+ },
+ {
+ "epoch": 0.8394343234221266,
+ "grad_norm": 26452.810546875,
+ "learning_rate": 1.932636798554552e-05,
+ "loss": 0.4093,
+ "step": 162700
+ },
+ {
+ "epoch": 0.8396922934047394,
+ "grad_norm": 24199.3515625,
+ "learning_rate": 1.929752773388189e-05,
+ "loss": 0.4003,
+ "step": 162750
+ },
+ {
+ "epoch": 0.8399502633873522,
+ "grad_norm": 27610.30859375,
+ "learning_rate": 1.9268703870603434e-05,
+ "loss": 0.4035,
+ "step": 162800
+ },
+ {
+ "epoch": 0.8402082333699651,
+ "grad_norm": 23799.3359375,
+ "learning_rate": 1.9239896411095777e-05,
+ "loss": 0.4072,
+ "step": 162850
+ },
+ {
+ "epoch": 0.8404662033525779,
+ "grad_norm": 24182.162109375,
+ "learning_rate": 1.9211105370735784e-05,
+ "loss": 0.4056,
+ "step": 162900
+ },
+ {
+ "epoch": 0.8407241733351907,
+ "grad_norm": 21251.0625,
+ "learning_rate": 1.918233076489153e-05,
+ "loss": 0.4073,
+ "step": 162950
+ },
+ {
+ "epoch": 0.8409821433178035,
+ "grad_norm": 22723.09765625,
+ "learning_rate": 1.9153572608922383e-05,
+ "loss": 0.4041,
+ "step": 163000
+ },
+ {
+ "epoch": 0.8412401133004164,
+ "grad_norm": 23557.125,
+ "learning_rate": 1.9124830918178876e-05,
+ "loss": 0.4064,
+ "step": 163050
+ },
+ {
+ "epoch": 0.8414980832830292,
+ "grad_norm": 24273.71484375,
+ "learning_rate": 1.9096105708002754e-05,
+ "loss": 0.4072,
+ "step": 163100
+ },
+ {
+ "epoch": 0.841756053265642,
+ "grad_norm": 24078.10546875,
+ "learning_rate": 1.9067396993726994e-05,
+ "loss": 0.409,
+ "step": 163150
+ },
+ {
+ "epoch": 0.8420140232482548,
+ "grad_norm": 23370.31640625,
+ "learning_rate": 1.9038704790675738e-05,
+ "loss": 0.4082,
+ "step": 163200
+ },
+ {
+ "epoch": 0.8422719932308677,
+ "grad_norm": 23478.564453125,
+ "learning_rate": 1.901002911416432e-05,
+ "loss": 0.4082,
+ "step": 163250
+ },
+ {
+ "epoch": 0.8425299632134805,
+ "grad_norm": 22697.802734375,
+ "learning_rate": 1.898136997949929e-05,
+ "loss": 0.4107,
+ "step": 163300
+ },
+ {
+ "epoch": 0.8427879331960934,
+ "grad_norm": 25571.9765625,
+ "learning_rate": 1.8952727401978326e-05,
+ "loss": 0.3996,
+ "step": 163350
+ },
+ {
+ "epoch": 0.8430459031787061,
+ "grad_norm": 24950.283203125,
+ "learning_rate": 1.8924101396890264e-05,
+ "loss": 0.403,
+ "step": 163400
+ },
+ {
+ "epoch": 0.8433038731613189,
+ "grad_norm": 22436.380859375,
+ "learning_rate": 1.8895491979515162e-05,
+ "loss": 0.4041,
+ "step": 163450
+ },
+ {
+ "epoch": 0.8435618431439318,
+ "grad_norm": 25954.529296875,
+ "learning_rate": 1.8866899165124097e-05,
+ "loss": 0.4003,
+ "step": 163500
+ },
+ {
+ "epoch": 0.8438198131265446,
+ "grad_norm": 21477.8828125,
+ "learning_rate": 1.883832296897944e-05,
+ "loss": 0.4063,
+ "step": 163550
+ },
+ {
+ "epoch": 0.8440777831091574,
+ "grad_norm": 24669.7890625,
+ "learning_rate": 1.8809763406334535e-05,
+ "loss": 0.4049,
+ "step": 163600
+ },
+ {
+ "epoch": 0.8443357530917702,
+ "grad_norm": 27181.50390625,
+ "learning_rate": 1.878122049243398e-05,
+ "loss": 0.4007,
+ "step": 163650
+ },
+ {
+ "epoch": 0.8445937230743831,
+ "grad_norm": 25191.591796875,
+ "learning_rate": 1.8752694242513408e-05,
+ "loss": 0.4072,
+ "step": 163700
+ },
+ {
+ "epoch": 0.8448516930569959,
+ "grad_norm": 24557.42578125,
+ "learning_rate": 1.872418467179956e-05,
+ "loss": 0.4043,
+ "step": 163750
+ },
+ {
+ "epoch": 0.8451096630396087,
+ "grad_norm": 25135.6328125,
+ "learning_rate": 1.8695691795510335e-05,
+ "loss": 0.4008,
+ "step": 163800
+ },
+ {
+ "epoch": 0.8453676330222215,
+ "grad_norm": 23372.181640625,
+ "learning_rate": 1.8667215628854656e-05,
+ "loss": 0.4073,
+ "step": 163850
+ },
+ {
+ "epoch": 0.8456256030048344,
+ "grad_norm": 23332.65625,
+ "learning_rate": 1.8638756187032554e-05,
+ "loss": 0.3987,
+ "step": 163900
+ },
+ {
+ "epoch": 0.8458835729874472,
+ "grad_norm": 23423.669921875,
+ "learning_rate": 1.861031348523512e-05,
+ "loss": 0.4066,
+ "step": 163950
+ },
+ {
+ "epoch": 0.8461415429700601,
+ "grad_norm": 25873.208984375,
+ "learning_rate": 1.858188753864452e-05,
+ "loss": 0.4015,
+ "step": 164000
+ },
+ {
+ "epoch": 0.8463995129526728,
+ "grad_norm": 24766.4140625,
+ "learning_rate": 1.8553478362433964e-05,
+ "loss": 0.4076,
+ "step": 164050
+ },
+ {
+ "epoch": 0.8466574829352856,
+ "grad_norm": 25044.45703125,
+ "learning_rate": 1.852508597176776e-05,
+ "loss": 0.3972,
+ "step": 164100
+ },
+ {
+ "epoch": 0.8469154529178985,
+ "grad_norm": 23699.478515625,
+ "learning_rate": 1.8496710381801157e-05,
+ "loss": 0.3953,
+ "step": 164150
+ },
+ {
+ "epoch": 0.8471734229005113,
+ "grad_norm": 22853.53125,
+ "learning_rate": 1.8468351607680546e-05,
+ "loss": 0.4095,
+ "step": 164200
+ },
+ {
+ "epoch": 0.8474313928831241,
+ "grad_norm": 21374.96875,
+ "learning_rate": 1.8440009664543267e-05,
+ "loss": 0.4092,
+ "step": 164250
+ },
+ {
+ "epoch": 0.8476893628657369,
+ "grad_norm": 22454.515625,
+ "learning_rate": 1.8411684567517694e-05,
+ "loss": 0.4005,
+ "step": 164300
+ },
+ {
+ "epoch": 0.8479473328483498,
+ "grad_norm": 23134.24609375,
+ "learning_rate": 1.8383376331723258e-05,
+ "loss": 0.4041,
+ "step": 164350
+ },
+ {
+ "epoch": 0.8482053028309626,
+ "grad_norm": 23000.69921875,
+ "learning_rate": 1.835508497227028e-05,
+ "loss": 0.4056,
+ "step": 164400
+ },
+ {
+ "epoch": 0.8484632728135754,
+ "grad_norm": 23213.333984375,
+ "learning_rate": 1.8326810504260194e-05,
+ "loss": 0.4076,
+ "step": 164450
+ },
+ {
+ "epoch": 0.8487212427961882,
+ "grad_norm": 24883.953125,
+ "learning_rate": 1.8298552942785353e-05,
+ "loss": 0.4023,
+ "step": 164500
+ },
+ {
+ "epoch": 0.8489792127788011,
+ "grad_norm": 23075.015625,
+ "learning_rate": 1.827031230292908e-05,
+ "loss": 0.4095,
+ "step": 164550
+ },
+ {
+ "epoch": 0.8492371827614139,
+ "grad_norm": 24055.23828125,
+ "learning_rate": 1.824208859976569e-05,
+ "loss": 0.4034,
+ "step": 164600
+ },
+ {
+ "epoch": 0.8494951527440268,
+ "grad_norm": 24572.919921875,
+ "learning_rate": 1.8213881848360438e-05,
+ "loss": 0.4106,
+ "step": 164650
+ },
+ {
+ "epoch": 0.8497531227266395,
+ "grad_norm": 26111.40234375,
+ "learning_rate": 1.8185692063769566e-05,
+ "loss": 0.4051,
+ "step": 164700
+ },
+ {
+ "epoch": 0.8500110927092523,
+ "grad_norm": 22763.25,
+ "learning_rate": 1.8157519261040222e-05,
+ "loss": 0.4019,
+ "step": 164750
+ },
+ {
+ "epoch": 0.8502690626918652,
+ "grad_norm": 22230.16796875,
+ "learning_rate": 1.8129363455210503e-05,
+ "loss": 0.4085,
+ "step": 164800
+ },
+ {
+ "epoch": 0.850527032674478,
+ "grad_norm": 24729.40234375,
+ "learning_rate": 1.8101224661309435e-05,
+ "loss": 0.4042,
+ "step": 164850
+ },
+ {
+ "epoch": 0.8507850026570908,
+ "grad_norm": 23329.431640625,
+ "learning_rate": 1.807310289435696e-05,
+ "loss": 0.405,
+ "step": 164900
+ },
+ {
+ "epoch": 0.8510429726397036,
+ "grad_norm": 24267.970703125,
+ "learning_rate": 1.8044998169363908e-05,
+ "loss": 0.406,
+ "step": 164950
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "grad_norm": 23587.689453125,
+ "learning_rate": 1.80169105013321e-05,
+ "loss": 0.4069,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8513009426223165,
+ "eval_loss": 0.3912332057952881,
+ "eval_runtime": 3189.1337,
+ "eval_samples_per_second": 972.402,
+ "eval_steps_per_second": 1.899,
+ "step": 165000
+ },
+ {
+ "epoch": 0.8515589126049293,
+ "grad_norm": 23356.634765625,
+ "learning_rate": 1.798883990525412e-05,
+ "loss": 0.4022,
+ "step": 165050
+ },
+ {
+ "epoch": 0.8518168825875421,
+ "grad_norm": 23850.75,
+ "learning_rate": 1.7960786396113542e-05,
+ "loss": 0.3984,
+ "step": 165100
+ },
+ {
+ "epoch": 0.8520748525701549,
+ "grad_norm": 23898.03125,
+ "learning_rate": 1.7932749988884795e-05,
+ "loss": 0.4035,
+ "step": 165150
+ },
+ {
+ "epoch": 0.8523328225527678,
+ "grad_norm": 23517.4453125,
+ "learning_rate": 1.790473069853314e-05,
+ "loss": 0.4061,
+ "step": 165200
+ },
+ {
+ "epoch": 0.8525907925353806,
+ "grad_norm": 24264.568359375,
+ "learning_rate": 1.787672854001478e-05,
+ "loss": 0.4076,
+ "step": 165250
+ },
+ {
+ "epoch": 0.8528487625179934,
+ "grad_norm": 23741.220703125,
+ "learning_rate": 1.7848743528276663e-05,
+ "loss": 0.4063,
+ "step": 165300
+ },
+ {
+ "epoch": 0.8531067325006062,
+ "grad_norm": 25368.697265625,
+ "learning_rate": 1.782077567825669e-05,
+ "loss": 0.4027,
+ "step": 165350
+ },
+ {
+ "epoch": 0.853364702483219,
+ "grad_norm": 21610.12890625,
+ "learning_rate": 1.779282500488355e-05,
+ "loss": 0.4067,
+ "step": 165400
+ },
+ {
+ "epoch": 0.8536226724658319,
+ "grad_norm": 26066.560546875,
+ "learning_rate": 1.7764891523076766e-05,
+ "loss": 0.4091,
+ "step": 165450
+ },
+ {
+ "epoch": 0.8538806424484447,
+ "grad_norm": 22909.5234375,
+ "learning_rate": 1.773697524774669e-05,
+ "loss": 0.4035,
+ "step": 165500
+ },
+ {
+ "epoch": 0.8541386124310575,
+ "grad_norm": 23672.54296875,
+ "learning_rate": 1.7709076193794478e-05,
+ "loss": 0.407,
+ "step": 165550
+ },
+ {
+ "epoch": 0.8543965824136703,
+ "grad_norm": 22466.203125,
+ "learning_rate": 1.7681194376112125e-05,
+ "loss": 0.4057,
+ "step": 165600
+ },
+ {
+ "epoch": 0.8546545523962832,
+ "grad_norm": 23236.4296875,
+ "learning_rate": 1.7653329809582404e-05,
+ "loss": 0.4058,
+ "step": 165650
+ },
+ {
+ "epoch": 0.854912522378896,
+ "grad_norm": 23181.5,
+ "learning_rate": 1.7625482509078873e-05,
+ "loss": 0.4007,
+ "step": 165700
+ },
+ {
+ "epoch": 0.8551704923615088,
+ "grad_norm": 20621.5,
+ "learning_rate": 1.7597652489465877e-05,
+ "loss": 0.4053,
+ "step": 165750
+ },
+ {
+ "epoch": 0.8554284623441216,
+ "grad_norm": 23911.7734375,
+ "learning_rate": 1.756983976559855e-05,
+ "loss": 0.4043,
+ "step": 165800
+ },
+ {
+ "epoch": 0.8556864323267345,
+ "grad_norm": 21440.978515625,
+ "learning_rate": 1.7542044352322768e-05,
+ "loss": 0.4076,
+ "step": 165850
+ },
+ {
+ "epoch": 0.8559444023093473,
+ "grad_norm": 22439.712890625,
+ "learning_rate": 1.7514266264475233e-05,
+ "loss": 0.3999,
+ "step": 165900
+ },
+ {
+ "epoch": 0.8562023722919601,
+ "grad_norm": 24814.876953125,
+ "learning_rate": 1.748650551688328e-05,
+ "loss": 0.405,
+ "step": 165950
+ },
+ {
+ "epoch": 0.8564603422745729,
+ "grad_norm": 21705.185546875,
+ "learning_rate": 1.7458762124365096e-05,
+ "loss": 0.4007,
+ "step": 166000
+ },
+ {
+ "epoch": 0.8567183122571858,
+ "grad_norm": 25317.05078125,
+ "learning_rate": 1.7431036101729604e-05,
+ "loss": 0.4036,
+ "step": 166050
+ },
+ {
+ "epoch": 0.8569762822397986,
+ "grad_norm": 23984.142578125,
+ "learning_rate": 1.7403327463776343e-05,
+ "loss": 0.4027,
+ "step": 166100
+ },
+ {
+ "epoch": 0.8572342522224115,
+ "grad_norm": 24149.794921875,
+ "learning_rate": 1.7375636225295716e-05,
+ "loss": 0.3986,
+ "step": 166150
+ },
+ {
+ "epoch": 0.8574922222050242,
+ "grad_norm": 20085.748046875,
+ "learning_rate": 1.73479624010687e-05,
+ "loss": 0.4032,
+ "step": 166200
+ },
+ {
+ "epoch": 0.857750192187637,
+ "grad_norm": 25550.01171875,
+ "learning_rate": 1.732030600586711e-05,
+ "loss": 0.4067,
+ "step": 166250
+ },
+ {
+ "epoch": 0.8580081621702499,
+ "grad_norm": 23439.69921875,
+ "learning_rate": 1.7292667054453364e-05,
+ "loss": 0.4058,
+ "step": 166300
+ },
+ {
+ "epoch": 0.8582661321528627,
+ "grad_norm": 24064.46484375,
+ "learning_rate": 1.7265045561580606e-05,
+ "loss": 0.406,
+ "step": 166350
+ },
+ {
+ "epoch": 0.8585241021354755,
+ "grad_norm": 27679.162109375,
+ "learning_rate": 1.723744154199264e-05,
+ "loss": 0.403,
+ "step": 166400
+ },
+ {
+ "epoch": 0.8587820721180883,
+ "grad_norm": 21371.59765625,
+ "learning_rate": 1.7209855010423977e-05,
+ "loss": 0.4103,
+ "step": 166450
+ },
+ {
+ "epoch": 0.8590400421007012,
+ "grad_norm": 24340.283203125,
+ "learning_rate": 1.7182285981599766e-05,
+ "loss": 0.4073,
+ "step": 166500
+ },
+ {
+ "epoch": 0.859298012083314,
+ "grad_norm": 22603.62109375,
+ "learning_rate": 1.7154734470235823e-05,
+ "loss": 0.4026,
+ "step": 166550
+ },
+ {
+ "epoch": 0.8595559820659268,
+ "grad_norm": 21442.248046875,
+ "learning_rate": 1.7127200491038607e-05,
+ "loss": 0.4089,
+ "step": 166600
+ },
+ {
+ "epoch": 0.8598139520485396,
+ "grad_norm": 22127.478515625,
+ "learning_rate": 1.7099684058705212e-05,
+ "loss": 0.4073,
+ "step": 166650
+ },
+ {
+ "epoch": 0.8600719220311525,
+ "grad_norm": 37660.0859375,
+ "learning_rate": 1.707218518792342e-05,
+ "loss": 0.404,
+ "step": 166700
+ },
+ {
+ "epoch": 0.8603298920137653,
+ "grad_norm": 23772.982421875,
+ "learning_rate": 1.704470389337153e-05,
+ "loss": 0.4004,
+ "step": 166750
+ },
+ {
+ "epoch": 0.8605878619963782,
+ "grad_norm": 24957.23828125,
+ "learning_rate": 1.7017240189718575e-05,
+ "loss": 0.4025,
+ "step": 166800
+ },
+ {
+ "epoch": 0.8608458319789909,
+ "grad_norm": 25014.044921875,
+ "learning_rate": 1.6989794091624138e-05,
+ "loss": 0.4037,
+ "step": 166850
+ },
+ {
+ "epoch": 0.8611038019616037,
+ "grad_norm": 23370.162109375,
+ "learning_rate": 1.696236561373839e-05,
+ "loss": 0.4043,
+ "step": 166900
+ },
+ {
+ "epoch": 0.8613617719442166,
+ "grad_norm": 25212.830078125,
+ "learning_rate": 1.693495477070217e-05,
+ "loss": 0.3997,
+ "step": 166950
+ },
+ {
+ "epoch": 0.8616197419268294,
+ "grad_norm": 22828.701171875,
+ "learning_rate": 1.69075615771468e-05,
+ "loss": 0.4063,
+ "step": 167000
+ },
+ {
+ "epoch": 0.8618777119094422,
+ "grad_norm": 23862.4375,
+ "learning_rate": 1.6880186047694274e-05,
+ "loss": 0.4044,
+ "step": 167050
+ },
+ {
+ "epoch": 0.862135681892055,
+ "grad_norm": 25248.44140625,
+ "learning_rate": 1.685282819695711e-05,
+ "loss": 0.4072,
+ "step": 167100
+ },
+ {
+ "epoch": 0.8623936518746679,
+ "grad_norm": 24765.2421875,
+ "learning_rate": 1.68254880395384e-05,
+ "loss": 0.4055,
+ "step": 167150
+ },
+ {
+ "epoch": 0.8626516218572807,
+ "grad_norm": 22687.32421875,
+ "learning_rate": 1.6798165590031783e-05,
+ "loss": 0.4076,
+ "step": 167200
+ },
+ {
+ "epoch": 0.8629095918398935,
+ "grad_norm": 28427.16015625,
+ "learning_rate": 1.677086086302146e-05,
+ "loss": 0.3985,
+ "step": 167250
+ },
+ {
+ "epoch": 0.8631675618225063,
+ "grad_norm": 24114.146484375,
+ "learning_rate": 1.6743573873082147e-05,
+ "loss": 0.3993,
+ "step": 167300
+ },
+ {
+ "epoch": 0.8634255318051192,
+ "grad_norm": 22007.857421875,
+ "learning_rate": 1.6716304634779144e-05,
+ "loss": 0.4054,
+ "step": 167350
+ },
+ {
+ "epoch": 0.863683501787732,
+ "grad_norm": 24888.619140625,
+ "learning_rate": 1.6689053162668226e-05,
+ "loss": 0.3983,
+ "step": 167400
+ },
+ {
+ "epoch": 0.8639414717703447,
+ "grad_norm": 23306.1640625,
+ "learning_rate": 1.6661819471295704e-05,
+ "loss": 0.3985,
+ "step": 167450
+ },
+ {
+ "epoch": 0.8641994417529576,
+ "grad_norm": 25983.62109375,
+ "learning_rate": 1.6634603575198387e-05,
+ "loss": 0.4033,
+ "step": 167500
+ },
+ {
+ "epoch": 0.8644574117355704,
+ "grad_norm": 21851.826171875,
+ "learning_rate": 1.6607405488903582e-05,
+ "loss": 0.4067,
+ "step": 167550
+ },
+ {
+ "epoch": 0.8647153817181833,
+ "grad_norm": 23041.548828125,
+ "learning_rate": 1.6580225226929152e-05,
+ "loss": 0.4054,
+ "step": 167600
+ },
+ {
+ "epoch": 0.8649733517007961,
+ "grad_norm": 24893.72265625,
+ "learning_rate": 1.655306280378333e-05,
+ "loss": 0.4081,
+ "step": 167650
+ },
+ {
+ "epoch": 0.8652313216834089,
+ "grad_norm": 24462.869140625,
+ "learning_rate": 1.6525918233964933e-05,
+ "loss": 0.4093,
+ "step": 167700
+ },
+ {
+ "epoch": 0.8654892916660217,
+ "grad_norm": 20188.037109375,
+ "learning_rate": 1.6498791531963197e-05,
+ "loss": 0.3986,
+ "step": 167750
+ },
+ {
+ "epoch": 0.8657472616486346,
+ "grad_norm": 24806.51171875,
+ "learning_rate": 1.6471682712257812e-05,
+ "loss": 0.3988,
+ "step": 167800
+ },
+ {
+ "epoch": 0.8660052316312474,
+ "grad_norm": 21647.11328125,
+ "learning_rate": 1.6444591789318992e-05,
+ "loss": 0.4083,
+ "step": 167850
+ },
+ {
+ "epoch": 0.8662632016138602,
+ "grad_norm": 22894.3515625,
+ "learning_rate": 1.6417518777607277e-05,
+ "loss": 0.4004,
+ "step": 167900
+ },
+ {
+ "epoch": 0.866521171596473,
+ "grad_norm": 23173.974609375,
+ "learning_rate": 1.6390463691573765e-05,
+ "loss": 0.409,
+ "step": 167950
+ },
+ {
+ "epoch": 0.8667791415790859,
+ "grad_norm": 24268.001953125,
+ "learning_rate": 1.6363426545659927e-05,
+ "loss": 0.4021,
+ "step": 168000
+ },
+ {
+ "epoch": 0.8670371115616987,
+ "grad_norm": 23466.482421875,
+ "learning_rate": 1.6336407354297667e-05,
+ "loss": 0.4067,
+ "step": 168050
+ },
+ {
+ "epoch": 0.8672950815443115,
+ "grad_norm": 22965.560546875,
+ "learning_rate": 1.6309406131909298e-05,
+ "loss": 0.4127,
+ "step": 168100
+ },
+ {
+ "epoch": 0.8675530515269243,
+ "grad_norm": 22818.5859375,
+ "learning_rate": 1.6282422892907563e-05,
+ "loss": 0.4107,
+ "step": 168150
+ },
+ {
+ "epoch": 0.8678110215095372,
+ "grad_norm": 23358.80859375,
+ "learning_rate": 1.6255457651695565e-05,
+ "loss": 0.3985,
+ "step": 168200
+ },
+ {
+ "epoch": 0.86806899149215,
+ "grad_norm": 24952.044921875,
+ "learning_rate": 1.6228510422666865e-05,
+ "loss": 0.4021,
+ "step": 168250
+ },
+ {
+ "epoch": 0.8683269614747628,
+ "grad_norm": 23554.359375,
+ "learning_rate": 1.6201581220205353e-05,
+ "loss": 0.4091,
+ "step": 168300
+ },
+ {
+ "epoch": 0.8685849314573756,
+ "grad_norm": 23862.92578125,
+ "learning_rate": 1.6174670058685316e-05,
+ "loss": 0.4009,
+ "step": 168350
+ },
+ {
+ "epoch": 0.8688429014399884,
+ "grad_norm": 23549.693359375,
+ "learning_rate": 1.6147776952471415e-05,
+ "loss": 0.4062,
+ "step": 168400
+ },
+ {
+ "epoch": 0.8691008714226013,
+ "grad_norm": 25237.26953125,
+ "learning_rate": 1.612090191591865e-05,
+ "loss": 0.4009,
+ "step": 168450
+ },
+ {
+ "epoch": 0.8693588414052141,
+ "grad_norm": 24368.298828125,
+ "learning_rate": 1.6094044963372444e-05,
+ "loss": 0.4052,
+ "step": 168500
+ },
+ {
+ "epoch": 0.8696168113878269,
+ "grad_norm": 24438.0,
+ "learning_rate": 1.6067206109168453e-05,
+ "loss": 0.4077,
+ "step": 168550
+ },
+ {
+ "epoch": 0.8698747813704397,
+ "grad_norm": 30002.744140625,
+ "learning_rate": 1.6040385367632786e-05,
+ "loss": 0.4029,
+ "step": 168600
+ },
+ {
+ "epoch": 0.8701327513530526,
+ "grad_norm": 24591.333984375,
+ "learning_rate": 1.6013582753081824e-05,
+ "loss": 0.4019,
+ "step": 168650
+ },
+ {
+ "epoch": 0.8703907213356654,
+ "grad_norm": 24005.166015625,
+ "learning_rate": 1.5986798279822263e-05,
+ "loss": 0.4046,
+ "step": 168700
+ },
+ {
+ "epoch": 0.8706486913182782,
+ "grad_norm": 22198.482421875,
+ "learning_rate": 1.5960031962151167e-05,
+ "loss": 0.4003,
+ "step": 168750
+ },
+ {
+ "epoch": 0.870906661300891,
+ "grad_norm": 23392.919921875,
+ "learning_rate": 1.5933283814355872e-05,
+ "loss": 0.4039,
+ "step": 168800
+ },
+ {
+ "epoch": 0.8711646312835039,
+ "grad_norm": 26185.88671875,
+ "learning_rate": 1.5906553850714003e-05,
+ "loss": 0.4044,
+ "step": 168850
+ },
+ {
+ "epoch": 0.8714226012661167,
+ "grad_norm": 34066.59765625,
+ "learning_rate": 1.5879842085493514e-05,
+ "loss": 0.4068,
+ "step": 168900
+ },
+ {
+ "epoch": 0.8716805712487296,
+ "grad_norm": 21913.802734375,
+ "learning_rate": 1.5853148532952616e-05,
+ "loss": 0.4083,
+ "step": 168950
+ },
+ {
+ "epoch": 0.8719385412313423,
+ "grad_norm": 22491.25390625,
+ "learning_rate": 1.5826473207339802e-05,
+ "loss": 0.4037,
+ "step": 169000
+ },
+ {
+ "epoch": 0.8721965112139551,
+ "grad_norm": 23891.447265625,
+ "learning_rate": 1.579981612289389e-05,
+ "loss": 0.4033,
+ "step": 169050
+ },
+ {
+ "epoch": 0.872454481196568,
+ "grad_norm": 24374.109375,
+ "learning_rate": 1.5773177293843855e-05,
+ "loss": 0.41,
+ "step": 169100
+ },
+ {
+ "epoch": 0.8727124511791808,
+ "grad_norm": 24323.197265625,
+ "learning_rate": 1.574655673440903e-05,
+ "loss": 0.3999,
+ "step": 169150
+ },
+ {
+ "epoch": 0.8729704211617936,
+ "grad_norm": 22040.76171875,
+ "learning_rate": 1.5719954458798943e-05,
+ "loss": 0.3997,
+ "step": 169200
+ },
+ {
+ "epoch": 0.8732283911444064,
+ "grad_norm": 32067.173828125,
+ "learning_rate": 1.5693370481213355e-05,
+ "loss": 0.4028,
+ "step": 169250
+ },
+ {
+ "epoch": 0.8734863611270193,
+ "grad_norm": 27840.97265625,
+ "learning_rate": 1.5666804815842322e-05,
+ "loss": 0.4082,
+ "step": 169300
+ },
+ {
+ "epoch": 0.8737443311096321,
+ "grad_norm": 23976.154296875,
+ "learning_rate": 1.5640257476866033e-05,
+ "loss": 0.4075,
+ "step": 169350
+ },
+ {
+ "epoch": 0.8740023010922449,
+ "grad_norm": 22856.724609375,
+ "learning_rate": 1.5613728478454976e-05,
+ "loss": 0.4033,
+ "step": 169400
+ },
+ {
+ "epoch": 0.8742602710748577,
+ "grad_norm": 22639.69140625,
+ "learning_rate": 1.5587217834769803e-05,
+ "loss": 0.4052,
+ "step": 169450
+ },
+ {
+ "epoch": 0.8745182410574706,
+ "grad_norm": 24272.626953125,
+ "learning_rate": 1.5560725559961386e-05,
+ "loss": 0.4029,
+ "step": 169500
+ },
+ {
+ "epoch": 0.8747762110400834,
+ "grad_norm": 23789.333984375,
+ "learning_rate": 1.553425166817079e-05,
+ "loss": 0.4078,
+ "step": 169550
+ },
+ {
+ "epoch": 0.8750341810226961,
+ "grad_norm": 23287.294921875,
+ "learning_rate": 1.5507796173529248e-05,
+ "loss": 0.408,
+ "step": 169600
+ },
+ {
+ "epoch": 0.875292151005309,
+ "grad_norm": 22272.13671875,
+ "learning_rate": 1.548135909015822e-05,
+ "loss": 0.4017,
+ "step": 169650
+ },
+ {
+ "epoch": 0.8755501209879218,
+ "grad_norm": 24645.40234375,
+ "learning_rate": 1.5454940432169297e-05,
+ "loss": 0.4001,
+ "step": 169700
+ },
+ {
+ "epoch": 0.8758080909705347,
+ "grad_norm": 26364.072265625,
+ "learning_rate": 1.5428540213664243e-05,
+ "loss": 0.411,
+ "step": 169750
+ },
+ {
+ "epoch": 0.8760660609531475,
+ "grad_norm": 24535.76171875,
+ "learning_rate": 1.5402158448734987e-05,
+ "loss": 0.4042,
+ "step": 169800
+ },
+ {
+ "epoch": 0.8763240309357603,
+ "grad_norm": 23294.94140625,
+ "learning_rate": 1.53757951514636e-05,
+ "loss": 0.4083,
+ "step": 169850
+ },
+ {
+ "epoch": 0.8765820009183731,
+ "grad_norm": 23390.046875,
+ "learning_rate": 1.5349450335922295e-05,
+ "loss": 0.399,
+ "step": 169900
+ },
+ {
+ "epoch": 0.876839970900986,
+ "grad_norm": 23079.41796875,
+ "learning_rate": 1.5323124016173455e-05,
+ "loss": 0.4078,
+ "step": 169950
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "grad_norm": 24190.23046875,
+ "learning_rate": 1.529681620626951e-05,
+ "loss": 0.4013,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8770979408835988,
+ "eval_loss": 0.39030978083610535,
+ "eval_runtime": 3197.421,
+ "eval_samples_per_second": 969.882,
+ "eval_steps_per_second": 1.894,
+ "step": 170000
+ },
+ {
+ "epoch": 0.8773559108662116,
+ "grad_norm": 24830.658203125,
+ "learning_rate": 1.5270526920253098e-05,
+ "loss": 0.4053,
+ "step": 170050
+ },
+ {
+ "epoch": 0.8776138808488244,
+ "grad_norm": 21314.533203125,
+ "learning_rate": 1.5244256172156923e-05,
+ "loss": 0.4067,
+ "step": 170100
+ },
+ {
+ "epoch": 0.8778718508314373,
+ "grad_norm": 23271.314453125,
+ "learning_rate": 1.521800397600378e-05,
+ "loss": 0.4024,
+ "step": 170150
+ },
+ {
+ "epoch": 0.8781298208140501,
+ "grad_norm": 20112.265625,
+ "learning_rate": 1.5191770345806632e-05,
+ "loss": 0.4001,
+ "step": 170200
+ },
+ {
+ "epoch": 0.8783877907966628,
+ "grad_norm": 23957.087890625,
+ "learning_rate": 1.5165555295568418e-05,
+ "loss": 0.406,
+ "step": 170250
+ },
+ {
+ "epoch": 0.8786457607792757,
+ "grad_norm": 23699.181640625,
+ "learning_rate": 1.5139358839282275e-05,
+ "loss": 0.4005,
+ "step": 170300
+ },
+ {
+ "epoch": 0.8789037307618885,
+ "grad_norm": 23276.4453125,
+ "learning_rate": 1.5113180990931353e-05,
+ "loss": 0.4057,
+ "step": 170350
+ },
+ {
+ "epoch": 0.8791617007445014,
+ "grad_norm": 27051.26171875,
+ "learning_rate": 1.5087021764488867e-05,
+ "loss": 0.4037,
+ "step": 170400
+ },
+ {
+ "epoch": 0.8794196707271142,
+ "grad_norm": 24315.11328125,
+ "learning_rate": 1.5060881173918112e-05,
+ "loss": 0.4004,
+ "step": 170450
+ },
+ {
+ "epoch": 0.879677640709727,
+ "grad_norm": 22589.85546875,
+ "learning_rate": 1.5034759233172419e-05,
+ "loss": 0.402,
+ "step": 170500
+ },
+ {
+ "epoch": 0.8799356106923398,
+ "grad_norm": 24601.666015625,
+ "learning_rate": 1.5008655956195195e-05,
+ "loss": 0.4083,
+ "step": 170550
+ },
+ {
+ "epoch": 0.8801935806749527,
+ "grad_norm": 23203.884765625,
+ "learning_rate": 1.4982571356919862e-05,
+ "loss": 0.3971,
+ "step": 170600
+ },
+ {
+ "epoch": 0.8804515506575655,
+ "grad_norm": 28701.162109375,
+ "learning_rate": 1.4956505449269858e-05,
+ "loss": 0.3989,
+ "step": 170650
+ },
+ {
+ "epoch": 0.8807095206401783,
+ "grad_norm": 23548.541015625,
+ "learning_rate": 1.4930458247158668e-05,
+ "loss": 0.4014,
+ "step": 170700
+ },
+ {
+ "epoch": 0.8809674906227911,
+ "grad_norm": 26836.626953125,
+ "learning_rate": 1.4904429764489792e-05,
+ "loss": 0.3964,
+ "step": 170750
+ },
+ {
+ "epoch": 0.881225460605404,
+ "grad_norm": 23989.537109375,
+ "learning_rate": 1.4878420015156697e-05,
+ "loss": 0.4062,
+ "step": 170800
+ },
+ {
+ "epoch": 0.8814834305880168,
+ "grad_norm": 22008.498046875,
+ "learning_rate": 1.4852429013042945e-05,
+ "loss": 0.4034,
+ "step": 170850
+ },
+ {
+ "epoch": 0.8817414005706296,
+ "grad_norm": 22564.548828125,
+ "learning_rate": 1.4826456772021957e-05,
+ "loss": 0.3953,
+ "step": 170900
+ },
+ {
+ "epoch": 0.8819993705532424,
+ "grad_norm": 20611.005859375,
+ "learning_rate": 1.4800503305957264e-05,
+ "loss": 0.3993,
+ "step": 170950
+ },
+ {
+ "epoch": 0.8822573405358553,
+ "grad_norm": 23731.072265625,
+ "learning_rate": 1.4774568628702312e-05,
+ "loss": 0.4008,
+ "step": 171000
+ },
+ {
+ "epoch": 0.8825153105184681,
+ "grad_norm": 23515.265625,
+ "learning_rate": 1.4748652754100506e-05,
+ "loss": 0.4093,
+ "step": 171050
+ },
+ {
+ "epoch": 0.882773280501081,
+ "grad_norm": 20889.193359375,
+ "learning_rate": 1.4722755695985291e-05,
+ "loss": 0.4036,
+ "step": 171100
+ },
+ {
+ "epoch": 0.8830312504836937,
+ "grad_norm": 23561.208984375,
+ "learning_rate": 1.4696877468179954e-05,
+ "loss": 0.4009,
+ "step": 171150
+ },
+ {
+ "epoch": 0.8832892204663065,
+ "grad_norm": 29216.3046875,
+ "learning_rate": 1.4671018084497828e-05,
+ "loss": 0.4087,
+ "step": 171200
+ },
+ {
+ "epoch": 0.8835471904489194,
+ "grad_norm": 24697.615234375,
+ "learning_rate": 1.4645177558742147e-05,
+ "loss": 0.3976,
+ "step": 171250
+ },
+ {
+ "epoch": 0.8838051604315322,
+ "grad_norm": 30338.123046875,
+ "learning_rate": 1.4619355904706062e-05,
+ "loss": 0.4046,
+ "step": 171300
+ },
+ {
+ "epoch": 0.884063130414145,
+ "grad_norm": 22565.310546875,
+ "learning_rate": 1.4593553136172705e-05,
+ "loss": 0.4011,
+ "step": 171350
+ },
+ {
+ "epoch": 0.8843211003967578,
+ "grad_norm": 23498.0,
+ "learning_rate": 1.4567769266915077e-05,
+ "loss": 0.4071,
+ "step": 171400
+ },
+ {
+ "epoch": 0.8845790703793707,
+ "grad_norm": 23772.279296875,
+ "learning_rate": 1.4542004310696112e-05,
+ "loss": 0.4048,
+ "step": 171450
+ },
+ {
+ "epoch": 0.8848370403619835,
+ "grad_norm": 22418.015625,
+ "learning_rate": 1.4516258281268636e-05,
+ "loss": 0.4009,
+ "step": 171500
+ },
+ {
+ "epoch": 0.8850950103445963,
+ "grad_norm": 25706.166015625,
+ "learning_rate": 1.4490531192375395e-05,
+ "loss": 0.4017,
+ "step": 171550
+ },
+ {
+ "epoch": 0.8853529803272091,
+ "grad_norm": 23563.37890625,
+ "learning_rate": 1.4464823057748982e-05,
+ "loss": 0.4056,
+ "step": 171600
+ },
+ {
+ "epoch": 0.885610950309822,
+ "grad_norm": 23104.65234375,
+ "learning_rate": 1.4439133891111956e-05,
+ "loss": 0.4014,
+ "step": 171650
+ },
+ {
+ "epoch": 0.8858689202924348,
+ "grad_norm": 22858.935546875,
+ "learning_rate": 1.4413463706176627e-05,
+ "loss": 0.4047,
+ "step": 171700
+ },
+ {
+ "epoch": 0.8861268902750475,
+ "grad_norm": 23197.859375,
+ "learning_rate": 1.4387812516645299e-05,
+ "loss": 0.4032,
+ "step": 171750
+ },
+ {
+ "epoch": 0.8863848602576604,
+ "grad_norm": 22323.4609375,
+ "learning_rate": 1.4362180336210057e-05,
+ "loss": 0.4018,
+ "step": 171800
+ },
+ {
+ "epoch": 0.8866428302402732,
+ "grad_norm": 23677.431640625,
+ "learning_rate": 1.433656717855285e-05,
+ "loss": 0.4057,
+ "step": 171850
+ },
+ {
+ "epoch": 0.8869008002228861,
+ "grad_norm": 22975.283203125,
+ "learning_rate": 1.4310973057345538e-05,
+ "loss": 0.4077,
+ "step": 171900
+ },
+ {
+ "epoch": 0.8871587702054989,
+ "grad_norm": 23338.005859375,
+ "learning_rate": 1.4285397986249694e-05,
+ "loss": 0.4037,
+ "step": 171950
+ },
+ {
+ "epoch": 0.8874167401881117,
+ "grad_norm": 22469.08203125,
+ "learning_rate": 1.4259841978916849e-05,
+ "loss": 0.4025,
+ "step": 172000
+ },
+ {
+ "epoch": 0.8876747101707245,
+ "grad_norm": 23508.064453125,
+ "learning_rate": 1.4234305048988288e-05,
+ "loss": 0.3979,
+ "step": 172050
+ },
+ {
+ "epoch": 0.8879326801533374,
+ "grad_norm": 25113.62890625,
+ "learning_rate": 1.4208787210095126e-05,
+ "loss": 0.3988,
+ "step": 172100
+ },
+ {
+ "epoch": 0.8881906501359502,
+ "grad_norm": 23230.75,
+ "learning_rate": 1.4183288475858298e-05,
+ "loss": 0.4029,
+ "step": 172150
+ },
+ {
+ "epoch": 0.888448620118563,
+ "grad_norm": 22058.306640625,
+ "learning_rate": 1.4157808859888516e-05,
+ "loss": 0.4082,
+ "step": 172200
+ },
+ {
+ "epoch": 0.8887065901011758,
+ "grad_norm": 23375.91015625,
+ "learning_rate": 1.4132348375786336e-05,
+ "loss": 0.407,
+ "step": 172250
+ },
+ {
+ "epoch": 0.8889645600837887,
+ "grad_norm": 21199.943359375,
+ "learning_rate": 1.4106907037142059e-05,
+ "loss": 0.4039,
+ "step": 172300
+ },
+ {
+ "epoch": 0.8892225300664015,
+ "grad_norm": 22754.287109375,
+ "learning_rate": 1.4081484857535777e-05,
+ "loss": 0.4,
+ "step": 172350
+ },
+ {
+ "epoch": 0.8894805000490142,
+ "grad_norm": 23116.21484375,
+ "learning_rate": 1.405608185053735e-05,
+ "loss": 0.4026,
+ "step": 172400
+ },
+ {
+ "epoch": 0.8897384700316271,
+ "grad_norm": 22281.65625,
+ "learning_rate": 1.4030698029706423e-05,
+ "loss": 0.3992,
+ "step": 172450
+ },
+ {
+ "epoch": 0.8899964400142399,
+ "grad_norm": 22979.447265625,
+ "learning_rate": 1.400533340859237e-05,
+ "loss": 0.4027,
+ "step": 172500
+ },
+ {
+ "epoch": 0.8902544099968528,
+ "grad_norm": 25733.873046875,
+ "learning_rate": 1.3979988000734373e-05,
+ "loss": 0.4092,
+ "step": 172550
+ },
+ {
+ "epoch": 0.8905123799794656,
+ "grad_norm": 23825.38671875,
+ "learning_rate": 1.395466181966127e-05,
+ "loss": 0.3997,
+ "step": 172600
+ },
+ {
+ "epoch": 0.8907703499620784,
+ "grad_norm": 27504.0703125,
+ "learning_rate": 1.3929354878891715e-05,
+ "loss": 0.403,
+ "step": 172650
+ },
+ {
+ "epoch": 0.8910283199446912,
+ "grad_norm": 28201.208984375,
+ "learning_rate": 1.3904067191934067e-05,
+ "loss": 0.4029,
+ "step": 172700
+ },
+ {
+ "epoch": 0.8912862899273041,
+ "grad_norm": 24115.69140625,
+ "learning_rate": 1.3878798772286377e-05,
+ "loss": 0.3979,
+ "step": 172750
+ },
+ {
+ "epoch": 0.8915442599099169,
+ "grad_norm": 20489.552734375,
+ "learning_rate": 1.3853549633436491e-05,
+ "loss": 0.4001,
+ "step": 172800
+ },
+ {
+ "epoch": 0.8918022298925297,
+ "grad_norm": 23580.583984375,
+ "learning_rate": 1.3828319788861838e-05,
+ "loss": 0.3983,
+ "step": 172850
+ },
+ {
+ "epoch": 0.8920601998751425,
+ "grad_norm": 24172.771484375,
+ "learning_rate": 1.3803109252029678e-05,
+ "loss": 0.4081,
+ "step": 172900
+ },
+ {
+ "epoch": 0.8923181698577554,
+ "grad_norm": 26543.375,
+ "learning_rate": 1.3777918036396887e-05,
+ "loss": 0.4015,
+ "step": 172950
+ },
+ {
+ "epoch": 0.8925761398403682,
+ "grad_norm": 27849.654296875,
+ "learning_rate": 1.3752746155410046e-05,
+ "loss": 0.4045,
+ "step": 173000
+ },
+ {
+ "epoch": 0.892834109822981,
+ "grad_norm": 25752.724609375,
+ "learning_rate": 1.3727593622505424e-05,
+ "loss": 0.4022,
+ "step": 173050
+ },
+ {
+ "epoch": 0.8930920798055938,
+ "grad_norm": 22836.892578125,
+ "learning_rate": 1.3702460451108935e-05,
+ "loss": 0.4015,
+ "step": 173100
+ },
+ {
+ "epoch": 0.8933500497882066,
+ "grad_norm": 26556.62890625,
+ "learning_rate": 1.3677346654636208e-05,
+ "loss": 0.4017,
+ "step": 173150
+ },
+ {
+ "epoch": 0.8936080197708195,
+ "grad_norm": 24310.390625,
+ "learning_rate": 1.3652252246492492e-05,
+ "loss": 0.4015,
+ "step": 173200
+ },
+ {
+ "epoch": 0.8938659897534323,
+ "grad_norm": 23713.0859375,
+ "learning_rate": 1.3627177240072698e-05,
+ "loss": 0.4024,
+ "step": 173250
+ },
+ {
+ "epoch": 0.8941239597360451,
+ "grad_norm": 21189.57421875,
+ "learning_rate": 1.3602121648761373e-05,
+ "loss": 0.4012,
+ "step": 173300
+ },
+ {
+ "epoch": 0.8943819297186579,
+ "grad_norm": 24229.1484375,
+ "learning_rate": 1.3577085485932705e-05,
+ "loss": 0.4105,
+ "step": 173350
+ },
+ {
+ "epoch": 0.8946398997012708,
+ "grad_norm": 23998.22265625,
+ "learning_rate": 1.3552068764950504e-05,
+ "loss": 0.4004,
+ "step": 173400
+ },
+ {
+ "epoch": 0.8948978696838836,
+ "grad_norm": 24751.1171875,
+ "learning_rate": 1.3527071499168253e-05,
+ "loss": 0.4024,
+ "step": 173450
+ },
+ {
+ "epoch": 0.8951558396664964,
+ "grad_norm": 24872.029296875,
+ "learning_rate": 1.3502093701928948e-05,
+ "loss": 0.406,
+ "step": 173500
+ },
+ {
+ "epoch": 0.8954138096491092,
+ "grad_norm": 23180.771484375,
+ "learning_rate": 1.3477135386565297e-05,
+ "loss": 0.4041,
+ "step": 173550
+ },
+ {
+ "epoch": 0.8956717796317221,
+ "grad_norm": 23679.1484375,
+ "learning_rate": 1.3452196566399555e-05,
+ "loss": 0.4095,
+ "step": 173600
+ },
+ {
+ "epoch": 0.8959297496143349,
+ "grad_norm": 26730.537109375,
+ "learning_rate": 1.3427277254743565e-05,
+ "loss": 0.4058,
+ "step": 173650
+ },
+ {
+ "epoch": 0.8961877195969477,
+ "grad_norm": 23320.666015625,
+ "learning_rate": 1.3402377464898813e-05,
+ "loss": 0.4038,
+ "step": 173700
+ },
+ {
+ "epoch": 0.8964456895795605,
+ "grad_norm": 22802.87890625,
+ "learning_rate": 1.3377497210156276e-05,
+ "loss": 0.3977,
+ "step": 173750
+ },
+ {
+ "epoch": 0.8967036595621733,
+ "grad_norm": 21257.22265625,
+ "learning_rate": 1.3352636503796584e-05,
+ "loss": 0.4074,
+ "step": 173800
+ },
+ {
+ "epoch": 0.8969616295447862,
+ "grad_norm": 23935.412109375,
+ "learning_rate": 1.332779535908989e-05,
+ "loss": 0.4021,
+ "step": 173850
+ },
+ {
+ "epoch": 0.8972195995273989,
+ "grad_norm": 21819.267578125,
+ "learning_rate": 1.3302973789295925e-05,
+ "loss": 0.3992,
+ "step": 173900
+ },
+ {
+ "epoch": 0.8974775695100118,
+ "grad_norm": 23360.71875,
+ "learning_rate": 1.327817180766393e-05,
+ "loss": 0.4051,
+ "step": 173950
+ },
+ {
+ "epoch": 0.8977355394926246,
+ "grad_norm": 24474.685546875,
+ "learning_rate": 1.3253389427432772e-05,
+ "loss": 0.4046,
+ "step": 174000
+ },
+ {
+ "epoch": 0.8979935094752375,
+ "grad_norm": 29715.3359375,
+ "learning_rate": 1.3228626661830779e-05,
+ "loss": 0.4037,
+ "step": 174050
+ },
+ {
+ "epoch": 0.8982514794578503,
+ "grad_norm": 23241.20703125,
+ "learning_rate": 1.3203883524075833e-05,
+ "loss": 0.4003,
+ "step": 174100
+ },
+ {
+ "epoch": 0.8985094494404631,
+ "grad_norm": 26005.23828125,
+ "learning_rate": 1.3179160027375347e-05,
+ "loss": 0.3992,
+ "step": 174150
+ },
+ {
+ "epoch": 0.8987674194230759,
+ "grad_norm": 23322.212890625,
+ "learning_rate": 1.3154456184926234e-05,
+ "loss": 0.4037,
+ "step": 174200
+ },
+ {
+ "epoch": 0.8990253894056888,
+ "grad_norm": 22434.90234375,
+ "learning_rate": 1.3129772009914964e-05,
+ "loss": 0.4044,
+ "step": 174250
+ },
+ {
+ "epoch": 0.8992833593883016,
+ "grad_norm": 24753.904296875,
+ "learning_rate": 1.3105107515517418e-05,
+ "loss": 0.4034,
+ "step": 174300
+ },
+ {
+ "epoch": 0.8995413293709144,
+ "grad_norm": 23271.814453125,
+ "learning_rate": 1.3080462714899066e-05,
+ "loss": 0.3992,
+ "step": 174350
+ },
+ {
+ "epoch": 0.8997992993535272,
+ "grad_norm": 23929.7578125,
+ "learning_rate": 1.3055837621214811e-05,
+ "loss": 0.4018,
+ "step": 174400
+ },
+ {
+ "epoch": 0.90005726933614,
+ "grad_norm": 25211.7265625,
+ "learning_rate": 1.3031232247609037e-05,
+ "loss": 0.4052,
+ "step": 174450
+ },
+ {
+ "epoch": 0.9003152393187529,
+ "grad_norm": 24554.791015625,
+ "learning_rate": 1.300664660721566e-05,
+ "loss": 0.3987,
+ "step": 174500
+ },
+ {
+ "epoch": 0.9005732093013656,
+ "grad_norm": 26028.396484375,
+ "learning_rate": 1.2982080713157963e-05,
+ "loss": 0.4032,
+ "step": 174550
+ },
+ {
+ "epoch": 0.9008311792839785,
+ "grad_norm": 24228.72265625,
+ "learning_rate": 1.295753457854878e-05,
+ "loss": 0.4001,
+ "step": 174600
+ },
+ {
+ "epoch": 0.9010891492665913,
+ "grad_norm": 24043.064453125,
+ "learning_rate": 1.293300821649036e-05,
+ "loss": 0.4009,
+ "step": 174650
+ },
+ {
+ "epoch": 0.9013471192492042,
+ "grad_norm": 25628.208984375,
+ "learning_rate": 1.2908501640074388e-05,
+ "loss": 0.4058,
+ "step": 174700
+ },
+ {
+ "epoch": 0.901605089231817,
+ "grad_norm": 23927.81640625,
+ "learning_rate": 1.288401486238201e-05,
+ "loss": 0.4044,
+ "step": 174750
+ },
+ {
+ "epoch": 0.9018630592144298,
+ "grad_norm": 23615.923828125,
+ "learning_rate": 1.2859547896483793e-05,
+ "loss": 0.4042,
+ "step": 174800
+ },
+ {
+ "epoch": 0.9021210291970426,
+ "grad_norm": 24990.158203125,
+ "learning_rate": 1.2835100755439705e-05,
+ "loss": 0.4033,
+ "step": 174850
+ },
+ {
+ "epoch": 0.9023789991796555,
+ "grad_norm": 23908.240234375,
+ "learning_rate": 1.2810673452299194e-05,
+ "loss": 0.404,
+ "step": 174900
+ },
+ {
+ "epoch": 0.9026369691622683,
+ "grad_norm": 24776.828125,
+ "learning_rate": 1.278626600010106e-05,
+ "loss": 0.4017,
+ "step": 174950
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "grad_norm": 23400.912109375,
+ "learning_rate": 1.276187841187354e-05,
+ "loss": 0.4007,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9028949391448811,
+ "eval_loss": 0.389443963766098,
+ "eval_runtime": 3184.6844,
+ "eval_samples_per_second": 973.761,
+ "eval_steps_per_second": 1.902,
+ "step": 175000
+ },
+ {
+ "epoch": 0.9031529091274939,
+ "grad_norm": 23482.337890625,
+ "learning_rate": 1.2737510700634248e-05,
+ "loss": 0.4033,
+ "step": 175050
+ },
+ {
+ "epoch": 0.9034108791101068,
+ "grad_norm": 24351.23828125,
+ "learning_rate": 1.2713162879390183e-05,
+ "loss": 0.4031,
+ "step": 175100
+ },
+ {
+ "epoch": 0.9036688490927196,
+ "grad_norm": 28495.6796875,
+ "learning_rate": 1.2688834961137785e-05,
+ "loss": 0.4057,
+ "step": 175150
+ },
+ {
+ "epoch": 0.9039268190753323,
+ "grad_norm": 23276.583984375,
+ "learning_rate": 1.2664526958862765e-05,
+ "loss": 0.4036,
+ "step": 175200
+ },
+ {
+ "epoch": 0.9041847890579452,
+ "grad_norm": 22784.033203125,
+ "learning_rate": 1.2640238885540312e-05,
+ "loss": 0.4054,
+ "step": 175250
+ },
+ {
+ "epoch": 0.904442759040558,
+ "grad_norm": 22389.21484375,
+ "learning_rate": 1.2615970754134914e-05,
+ "loss": 0.4036,
+ "step": 175300
+ },
+ {
+ "epoch": 0.9047007290231709,
+ "grad_norm": 24767.59375,
+ "learning_rate": 1.2591722577600412e-05,
+ "loss": 0.4055,
+ "step": 175350
+ },
+ {
+ "epoch": 0.9049586990057837,
+ "grad_norm": 24981.552734375,
+ "learning_rate": 1.2567494368880056e-05,
+ "loss": 0.3997,
+ "step": 175400
+ },
+ {
+ "epoch": 0.9052166689883965,
+ "grad_norm": 24523.580078125,
+ "learning_rate": 1.254328614090634e-05,
+ "loss": 0.4009,
+ "step": 175450
+ },
+ {
+ "epoch": 0.9054746389710093,
+ "grad_norm": 29571.404296875,
+ "learning_rate": 1.251909790660119e-05,
+ "loss": 0.4013,
+ "step": 175500
+ },
+ {
+ "epoch": 0.9057326089536222,
+ "grad_norm": 23286.564453125,
+ "learning_rate": 1.24949296788758e-05,
+ "loss": 0.3997,
+ "step": 175550
+ },
+ {
+ "epoch": 0.905990578936235,
+ "grad_norm": 23124.205078125,
+ "learning_rate": 1.247078147063071e-05,
+ "loss": 0.4056,
+ "step": 175600
+ },
+ {
+ "epoch": 0.9062485489188478,
+ "grad_norm": 23467.775390625,
+ "learning_rate": 1.2446653294755755e-05,
+ "loss": 0.3976,
+ "step": 175650
+ },
+ {
+ "epoch": 0.9065065189014606,
+ "grad_norm": 23793.609375,
+ "learning_rate": 1.2422545164130096e-05,
+ "loss": 0.4018,
+ "step": 175700
+ },
+ {
+ "epoch": 0.9067644888840735,
+ "grad_norm": 24439.974609375,
+ "learning_rate": 1.2398457091622167e-05,
+ "loss": 0.4063,
+ "step": 175750
+ },
+ {
+ "epoch": 0.9070224588666863,
+ "grad_norm": 23925.22265625,
+ "learning_rate": 1.2374389090089744e-05,
+ "loss": 0.4039,
+ "step": 175800
+ },
+ {
+ "epoch": 0.907280428849299,
+ "grad_norm": 23174.416015625,
+ "learning_rate": 1.2350341172379853e-05,
+ "loss": 0.4031,
+ "step": 175850
+ },
+ {
+ "epoch": 0.9075383988319119,
+ "grad_norm": 26669.806640625,
+ "learning_rate": 1.2326313351328794e-05,
+ "loss": 0.4031,
+ "step": 175900
+ },
+ {
+ "epoch": 0.9077963688145247,
+ "grad_norm": 21128.041015625,
+ "learning_rate": 1.2302305639762168e-05,
+ "loss": 0.407,
+ "step": 175950
+ },
+ {
+ "epoch": 0.9080543387971376,
+ "grad_norm": 22798.111328125,
+ "learning_rate": 1.2278318050494797e-05,
+ "loss": 0.4035,
+ "step": 176000
+ },
+ {
+ "epoch": 0.9083123087797504,
+ "grad_norm": 23327.587890625,
+ "learning_rate": 1.2254350596330843e-05,
+ "loss": 0.3958,
+ "step": 176050
+ },
+ {
+ "epoch": 0.9085702787623632,
+ "grad_norm": 22225.3125,
+ "learning_rate": 1.2230403290063613e-05,
+ "loss": 0.4074,
+ "step": 176100
+ },
+ {
+ "epoch": 0.908828248744976,
+ "grad_norm": 22727.791015625,
+ "learning_rate": 1.2206476144475754e-05,
+ "loss": 0.4063,
+ "step": 176150
+ },
+ {
+ "epoch": 0.9090862187275889,
+ "grad_norm": 26138.931640625,
+ "learning_rate": 1.2182569172339098e-05,
+ "loss": 0.408,
+ "step": 176200
+ },
+ {
+ "epoch": 0.9093441887102017,
+ "grad_norm": 23436.91796875,
+ "learning_rate": 1.2158682386414716e-05,
+ "loss": 0.4038,
+ "step": 176250
+ },
+ {
+ "epoch": 0.9096021586928145,
+ "grad_norm": 23695.244140625,
+ "learning_rate": 1.2134815799452947e-05,
+ "loss": 0.4074,
+ "step": 176300
+ },
+ {
+ "epoch": 0.9098601286754273,
+ "grad_norm": 25616.240234375,
+ "learning_rate": 1.2110969424193263e-05,
+ "loss": 0.3971,
+ "step": 176350
+ },
+ {
+ "epoch": 0.9101180986580402,
+ "grad_norm": 27326.634765625,
+ "learning_rate": 1.2087143273364431e-05,
+ "loss": 0.4045,
+ "step": 176400
+ },
+ {
+ "epoch": 0.910376068640653,
+ "grad_norm": 23704.775390625,
+ "learning_rate": 1.2063337359684384e-05,
+ "loss": 0.4071,
+ "step": 176450
+ },
+ {
+ "epoch": 0.9106340386232658,
+ "grad_norm": 25532.234375,
+ "learning_rate": 1.2039551695860251e-05,
+ "loss": 0.4021,
+ "step": 176500
+ },
+ {
+ "epoch": 0.9108920086058786,
+ "grad_norm": 25247.884765625,
+ "learning_rate": 1.201578629458835e-05,
+ "loss": 0.4074,
+ "step": 176550
+ },
+ {
+ "epoch": 0.9111499785884914,
+ "grad_norm": 29377.486328125,
+ "learning_rate": 1.1992041168554236e-05,
+ "loss": 0.4064,
+ "step": 176600
+ },
+ {
+ "epoch": 0.9114079485711043,
+ "grad_norm": 22188.34375,
+ "learning_rate": 1.1968316330432527e-05,
+ "loss": 0.404,
+ "step": 176650
+ },
+ {
+ "epoch": 0.911665918553717,
+ "grad_norm": 23766.0546875,
+ "learning_rate": 1.194461179288714e-05,
+ "loss": 0.4016,
+ "step": 176700
+ },
+ {
+ "epoch": 0.9119238885363299,
+ "grad_norm": 21386.623046875,
+ "learning_rate": 1.1920927568571078e-05,
+ "loss": 0.4055,
+ "step": 176750
+ },
+ {
+ "epoch": 0.9121818585189427,
+ "grad_norm": 25873.052734375,
+ "learning_rate": 1.1897263670126507e-05,
+ "loss": 0.3978,
+ "step": 176800
+ },
+ {
+ "epoch": 0.9124398285015556,
+ "grad_norm": 25235.5390625,
+ "learning_rate": 1.1873620110184803e-05,
+ "loss": 0.3975,
+ "step": 176850
+ },
+ {
+ "epoch": 0.9126977984841684,
+ "grad_norm": 22841.5,
+ "learning_rate": 1.1849996901366383e-05,
+ "loss": 0.4031,
+ "step": 176900
+ },
+ {
+ "epoch": 0.9129557684667812,
+ "grad_norm": 21522.388671875,
+ "learning_rate": 1.1826394056280893e-05,
+ "loss": 0.4048,
+ "step": 176950
+ },
+ {
+ "epoch": 0.913213738449394,
+ "grad_norm": 27600.689453125,
+ "learning_rate": 1.1802811587527074e-05,
+ "loss": 0.3984,
+ "step": 177000
+ },
+ {
+ "epoch": 0.9134717084320069,
+ "grad_norm": 24698.60546875,
+ "learning_rate": 1.177924950769278e-05,
+ "loss": 0.406,
+ "step": 177050
+ },
+ {
+ "epoch": 0.9137296784146197,
+ "grad_norm": 27378.033203125,
+ "learning_rate": 1.1755707829355001e-05,
+ "loss": 0.3993,
+ "step": 177100
+ },
+ {
+ "epoch": 0.9139876483972325,
+ "grad_norm": 27578.4296875,
+ "learning_rate": 1.1732186565079805e-05,
+ "loss": 0.3984,
+ "step": 177150
+ },
+ {
+ "epoch": 0.9142456183798453,
+ "grad_norm": 24650.6953125,
+ "learning_rate": 1.1708685727422424e-05,
+ "loss": 0.401,
+ "step": 177200
+ },
+ {
+ "epoch": 0.9145035883624582,
+ "grad_norm": 25550.0859375,
+ "learning_rate": 1.1685205328927135e-05,
+ "loss": 0.399,
+ "step": 177250
+ },
+ {
+ "epoch": 0.914761558345071,
+ "grad_norm": 22760.77734375,
+ "learning_rate": 1.166174538212732e-05,
+ "loss": 0.403,
+ "step": 177300
+ },
+ {
+ "epoch": 0.9150195283276837,
+ "grad_norm": 22038.26171875,
+ "learning_rate": 1.1638305899545443e-05,
+ "loss": 0.4066,
+ "step": 177350
+ },
+ {
+ "epoch": 0.9152774983102966,
+ "grad_norm": 23857.66015625,
+ "learning_rate": 1.1614886893693044e-05,
+ "loss": 0.4038,
+ "step": 177400
+ },
+ {
+ "epoch": 0.9155354682929094,
+ "grad_norm": 24813.55859375,
+ "learning_rate": 1.1591488377070724e-05,
+ "loss": 0.3992,
+ "step": 177450
+ },
+ {
+ "epoch": 0.9157934382755223,
+ "grad_norm": 24467.5859375,
+ "learning_rate": 1.1568110362168199e-05,
+ "loss": 0.4,
+ "step": 177500
+ },
+ {
+ "epoch": 0.9160514082581351,
+ "grad_norm": 22464.98046875,
+ "learning_rate": 1.1544752861464143e-05,
+ "loss": 0.4069,
+ "step": 177550
+ },
+ {
+ "epoch": 0.9163093782407479,
+ "grad_norm": 26591.51171875,
+ "learning_rate": 1.1521415887426379e-05,
+ "loss": 0.4008,
+ "step": 177600
+ },
+ {
+ "epoch": 0.9165673482233607,
+ "grad_norm": 21086.318359375,
+ "learning_rate": 1.1498099452511724e-05,
+ "loss": 0.4036,
+ "step": 177650
+ },
+ {
+ "epoch": 0.9168253182059736,
+ "grad_norm": 24243.072265625,
+ "learning_rate": 1.147480356916602e-05,
+ "loss": 0.4019,
+ "step": 177700
+ },
+ {
+ "epoch": 0.9170832881885864,
+ "grad_norm": 26714.83984375,
+ "learning_rate": 1.1451528249824206e-05,
+ "loss": 0.3978,
+ "step": 177750
+ },
+ {
+ "epoch": 0.9173412581711992,
+ "grad_norm": 24799.712890625,
+ "learning_rate": 1.1428273506910132e-05,
+ "loss": 0.4078,
+ "step": 177800
+ },
+ {
+ "epoch": 0.917599228153812,
+ "grad_norm": 25010.435546875,
+ "learning_rate": 1.1405039352836777e-05,
+ "loss": 0.4054,
+ "step": 177850
+ },
+ {
+ "epoch": 0.9178571981364249,
+ "grad_norm": 23657.78125,
+ "learning_rate": 1.1381825800006068e-05,
+ "loss": 0.4001,
+ "step": 177900
+ },
+ {
+ "epoch": 0.9181151681190377,
+ "grad_norm": 23865.349609375,
+ "learning_rate": 1.1358632860808955e-05,
+ "loss": 0.4012,
+ "step": 177950
+ },
+ {
+ "epoch": 0.9183731381016504,
+ "grad_norm": 26476.04296875,
+ "learning_rate": 1.1335460547625365e-05,
+ "loss": 0.3998,
+ "step": 178000
+ },
+ {
+ "epoch": 0.9186311080842633,
+ "grad_norm": 24907.89453125,
+ "learning_rate": 1.1312308872824235e-05,
+ "loss": 0.401,
+ "step": 178050
+ },
+ {
+ "epoch": 0.9188890780668761,
+ "grad_norm": 24008.54296875,
+ "learning_rate": 1.1289177848763494e-05,
+ "loss": 0.3991,
+ "step": 178100
+ },
+ {
+ "epoch": 0.919147048049489,
+ "grad_norm": 23814.396484375,
+ "learning_rate": 1.1266067487790027e-05,
+ "loss": 0.4039,
+ "step": 178150
+ },
+ {
+ "epoch": 0.9194050180321018,
+ "grad_norm": 25892.994140625,
+ "learning_rate": 1.1242977802239696e-05,
+ "loss": 0.4015,
+ "step": 178200
+ },
+ {
+ "epoch": 0.9196629880147146,
+ "grad_norm": 24185.7265625,
+ "learning_rate": 1.1219908804437328e-05,
+ "loss": 0.3992,
+ "step": 178250
+ },
+ {
+ "epoch": 0.9199209579973274,
+ "grad_norm": 23890.54296875,
+ "learning_rate": 1.1196860506696705e-05,
+ "loss": 0.4087,
+ "step": 178300
+ },
+ {
+ "epoch": 0.9201789279799403,
+ "grad_norm": 25288.83203125,
+ "learning_rate": 1.1173832921320554e-05,
+ "loss": 0.4038,
+ "step": 178350
+ },
+ {
+ "epoch": 0.9204368979625531,
+ "grad_norm": 27609.994140625,
+ "learning_rate": 1.1150826060600594e-05,
+ "loss": 0.4047,
+ "step": 178400
+ },
+ {
+ "epoch": 0.9206948679451659,
+ "grad_norm": 25010.259765625,
+ "learning_rate": 1.112783993681738e-05,
+ "loss": 0.4037,
+ "step": 178450
+ },
+ {
+ "epoch": 0.9209528379277787,
+ "grad_norm": 23663.78515625,
+ "learning_rate": 1.1104874562240514e-05,
+ "loss": 0.396,
+ "step": 178500
+ },
+ {
+ "epoch": 0.9212108079103916,
+ "grad_norm": 24960.072265625,
+ "learning_rate": 1.108192994912844e-05,
+ "loss": 0.4024,
+ "step": 178550
+ },
+ {
+ "epoch": 0.9214687778930044,
+ "grad_norm": 22778.66796875,
+ "learning_rate": 1.1059006109728543e-05,
+ "loss": 0.4039,
+ "step": 178600
+ },
+ {
+ "epoch": 0.9217267478756171,
+ "grad_norm": 20177.640625,
+ "learning_rate": 1.1036103056277165e-05,
+ "loss": 0.4008,
+ "step": 178650
+ },
+ {
+ "epoch": 0.92198471785823,
+ "grad_norm": 25084.703125,
+ "learning_rate": 1.1013220800999452e-05,
+ "loss": 0.4082,
+ "step": 178700
+ },
+ {
+ "epoch": 0.9222426878408428,
+ "grad_norm": 23697.529296875,
+ "learning_rate": 1.0990359356109558e-05,
+ "loss": 0.4083,
+ "step": 178750
+ },
+ {
+ "epoch": 0.9225006578234557,
+ "grad_norm": 26252.25,
+ "learning_rate": 1.0967518733810462e-05,
+ "loss": 0.4114,
+ "step": 178800
+ },
+ {
+ "epoch": 0.9227586278060684,
+ "grad_norm": 25295.103515625,
+ "learning_rate": 1.094469894629403e-05,
+ "loss": 0.4062,
+ "step": 178850
+ },
+ {
+ "epoch": 0.9230165977886813,
+ "grad_norm": 24484.203125,
+ "learning_rate": 1.0921900005741053e-05,
+ "loss": 0.4008,
+ "step": 178900
+ },
+ {
+ "epoch": 0.9232745677712941,
+ "grad_norm": 23360.701171875,
+ "learning_rate": 1.0899121924321154e-05,
+ "loss": 0.405,
+ "step": 178950
+ },
+ {
+ "epoch": 0.923532537753907,
+ "grad_norm": 22507.24609375,
+ "learning_rate": 1.0876364714192822e-05,
+ "loss": 0.3968,
+ "step": 179000
+ },
+ {
+ "epoch": 0.9237905077365198,
+ "grad_norm": 26761.66015625,
+ "learning_rate": 1.0853628387503423e-05,
+ "loss": 0.4021,
+ "step": 179050
+ },
+ {
+ "epoch": 0.9240484777191326,
+ "grad_norm": 26596.376953125,
+ "learning_rate": 1.0830912956389166e-05,
+ "loss": 0.3984,
+ "step": 179100
+ },
+ {
+ "epoch": 0.9243064477017454,
+ "grad_norm": 23996.490234375,
+ "learning_rate": 1.0808218432975093e-05,
+ "loss": 0.3996,
+ "step": 179150
+ },
+ {
+ "epoch": 0.9245644176843583,
+ "grad_norm": 22681.4609375,
+ "learning_rate": 1.0785544829375143e-05,
+ "loss": 0.4021,
+ "step": 179200
+ },
+ {
+ "epoch": 0.9248223876669711,
+ "grad_norm": 25675.728515625,
+ "learning_rate": 1.0762892157691995e-05,
+ "loss": 0.3942,
+ "step": 179250
+ },
+ {
+ "epoch": 0.9250803576495839,
+ "grad_norm": 26039.25,
+ "learning_rate": 1.0740260430017247e-05,
+ "loss": 0.4014,
+ "step": 179300
+ },
+ {
+ "epoch": 0.9253383276321967,
+ "grad_norm": 21596.50390625,
+ "learning_rate": 1.0717649658431256e-05,
+ "loss": 0.4017,
+ "step": 179350
+ },
+ {
+ "epoch": 0.9255962976148095,
+ "grad_norm": 25318.3125,
+ "learning_rate": 1.0695059855003204e-05,
+ "loss": 0.3968,
+ "step": 179400
+ },
+ {
+ "epoch": 0.9258542675974224,
+ "grad_norm": 20999.10546875,
+ "learning_rate": 1.0672491031791137e-05,
+ "loss": 0.4032,
+ "step": 179450
+ },
+ {
+ "epoch": 0.9261122375800351,
+ "grad_norm": 25034.404296875,
+ "learning_rate": 1.0649943200841794e-05,
+ "loss": 0.3987,
+ "step": 179500
+ },
+ {
+ "epoch": 0.926370207562648,
+ "grad_norm": 23470.205078125,
+ "learning_rate": 1.0627416374190819e-05,
+ "loss": 0.4009,
+ "step": 179550
+ },
+ {
+ "epoch": 0.9266281775452608,
+ "grad_norm": 23667.298828125,
+ "learning_rate": 1.0604910563862575e-05,
+ "loss": 0.4022,
+ "step": 179600
+ },
+ {
+ "epoch": 0.9268861475278737,
+ "grad_norm": 25315.5390625,
+ "learning_rate": 1.058242578187023e-05,
+ "loss": 0.4023,
+ "step": 179650
+ },
+ {
+ "epoch": 0.9271441175104865,
+ "grad_norm": 23639.34375,
+ "learning_rate": 1.0559962040215727e-05,
+ "loss": 0.407,
+ "step": 179700
+ },
+ {
+ "epoch": 0.9274020874930993,
+ "grad_norm": 29350.244140625,
+ "learning_rate": 1.0537519350889764e-05,
+ "loss": 0.4063,
+ "step": 179750
+ },
+ {
+ "epoch": 0.9276600574757121,
+ "grad_norm": 26077.30859375,
+ "learning_rate": 1.051509772587183e-05,
+ "loss": 0.4011,
+ "step": 179800
+ },
+ {
+ "epoch": 0.927918027458325,
+ "grad_norm": 22387.8046875,
+ "learning_rate": 1.0492697177130157e-05,
+ "loss": 0.398,
+ "step": 179850
+ },
+ {
+ "epoch": 0.9281759974409378,
+ "grad_norm": 24023.2734375,
+ "learning_rate": 1.0470317716621719e-05,
+ "loss": 0.4026,
+ "step": 179900
+ },
+ {
+ "epoch": 0.9284339674235506,
+ "grad_norm": 24288.666015625,
+ "learning_rate": 1.044795935629223e-05,
+ "loss": 0.403,
+ "step": 179950
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "grad_norm": 26163.923828125,
+ "learning_rate": 1.042562210807616e-05,
+ "loss": 0.4001,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9286919374061634,
+ "eval_loss": 0.3886363208293915,
+ "eval_runtime": 3188.2841,
+ "eval_samples_per_second": 972.661,
+ "eval_steps_per_second": 1.9,
+ "step": 180000
+ },
+ {
+ "epoch": 0.9289499073887763,
+ "grad_norm": 24379.322265625,
+ "learning_rate": 1.0403305983896683e-05,
+ "loss": 0.3978,
+ "step": 180050
+ },
+ {
+ "epoch": 0.9292078773713891,
+ "grad_norm": 23249.939453125,
+ "learning_rate": 1.0381010995665752e-05,
+ "loss": 0.4055,
+ "step": 180100
+ },
+ {
+ "epoch": 0.9294658473540018,
+ "grad_norm": 25460.6875,
+ "learning_rate": 1.0358737155283942e-05,
+ "loss": 0.4059,
+ "step": 180150
+ },
+ {
+ "epoch": 0.9297238173366147,
+ "grad_norm": 23166.548828125,
+ "learning_rate": 1.0336484474640651e-05,
+ "loss": 0.4051,
+ "step": 180200
+ },
+ {
+ "epoch": 0.9299817873192275,
+ "grad_norm": 23631.94921875,
+ "learning_rate": 1.0314252965613908e-05,
+ "loss": 0.3974,
+ "step": 180250
+ },
+ {
+ "epoch": 0.9302397573018404,
+ "grad_norm": 26213.556640625,
+ "learning_rate": 1.0292042640070449e-05,
+ "loss": 0.3983,
+ "step": 180300
+ },
+ {
+ "epoch": 0.9304977272844532,
+ "grad_norm": 24056.875,
+ "learning_rate": 1.0269853509865751e-05,
+ "loss": 0.3979,
+ "step": 180350
+ },
+ {
+ "epoch": 0.930755697267066,
+ "grad_norm": 24793.658203125,
+ "learning_rate": 1.0247685586843897e-05,
+ "loss": 0.3993,
+ "step": 180400
+ },
+ {
+ "epoch": 0.9310136672496788,
+ "grad_norm": 25296.04296875,
+ "learning_rate": 1.0225538882837733e-05,
+ "loss": 0.4047,
+ "step": 180450
+ },
+ {
+ "epoch": 0.9312716372322917,
+ "grad_norm": 21486.990234375,
+ "learning_rate": 1.0203413409668722e-05,
+ "loss": 0.3995,
+ "step": 180500
+ },
+ {
+ "epoch": 0.9315296072149045,
+ "grad_norm": 24168.083984375,
+ "learning_rate": 1.018130917914702e-05,
+ "loss": 0.4081,
+ "step": 180550
+ },
+ {
+ "epoch": 0.9317875771975173,
+ "grad_norm": 25313.568359375,
+ "learning_rate": 1.0159226203071431e-05,
+ "loss": 0.4024,
+ "step": 180600
+ },
+ {
+ "epoch": 0.9320455471801301,
+ "grad_norm": 22535.845703125,
+ "learning_rate": 1.0137164493229411e-05,
+ "loss": 0.3974,
+ "step": 180650
+ },
+ {
+ "epoch": 0.932303517162743,
+ "grad_norm": 24480.0703125,
+ "learning_rate": 1.0115124061397102e-05,
+ "loss": 0.4031,
+ "step": 180700
+ },
+ {
+ "epoch": 0.9325614871453558,
+ "grad_norm": 29667.470703125,
+ "learning_rate": 1.0093104919339241e-05,
+ "loss": 0.3991,
+ "step": 180750
+ },
+ {
+ "epoch": 0.9328194571279685,
+ "grad_norm": 22311.767578125,
+ "learning_rate": 1.0071107078809228e-05,
+ "loss": 0.402,
+ "step": 180800
+ },
+ {
+ "epoch": 0.9330774271105814,
+ "grad_norm": 22752.642578125,
+ "learning_rate": 1.0049130551549068e-05,
+ "loss": 0.4022,
+ "step": 180850
+ },
+ {
+ "epoch": 0.9333353970931942,
+ "grad_norm": 26333.43359375,
+ "learning_rate": 1.0027175349289424e-05,
+ "loss": 0.4006,
+ "step": 180900
+ },
+ {
+ "epoch": 0.9335933670758071,
+ "grad_norm": 22951.927734375,
+ "learning_rate": 1.0005241483749533e-05,
+ "loss": 0.4022,
+ "step": 180950
+ },
+ {
+ "epoch": 0.9338513370584198,
+ "grad_norm": 24532.15625,
+ "learning_rate": 9.983328966637318e-06,
+ "loss": 0.398,
+ "step": 181000
+ },
+ {
+ "epoch": 0.9341093070410327,
+ "grad_norm": 24624.205078125,
+ "learning_rate": 9.961437809649188e-06,
+ "loss": 0.4021,
+ "step": 181050
+ },
+ {
+ "epoch": 0.9343672770236455,
+ "grad_norm": 23679.087890625,
+ "learning_rate": 9.93956802447027e-06,
+ "loss": 0.4038,
+ "step": 181100
+ },
+ {
+ "epoch": 0.9346252470062584,
+ "grad_norm": 22279.52734375,
+ "learning_rate": 9.917719622774219e-06,
+ "loss": 0.3987,
+ "step": 181150
+ },
+ {
+ "epoch": 0.9348832169888712,
+ "grad_norm": 25709.376953125,
+ "learning_rate": 9.895892616223268e-06,
+ "loss": 0.4062,
+ "step": 181200
+ },
+ {
+ "epoch": 0.935141186971484,
+ "grad_norm": 24607.25,
+ "learning_rate": 9.874087016468298e-06,
+ "loss": 0.3973,
+ "step": 181250
+ },
+ {
+ "epoch": 0.9353991569540968,
+ "grad_norm": 25458.861328125,
+ "learning_rate": 9.852302835148652e-06,
+ "loss": 0.3993,
+ "step": 181300
+ },
+ {
+ "epoch": 0.9356571269367097,
+ "grad_norm": 24070.654296875,
+ "learning_rate": 9.830540083892358e-06,
+ "loss": 0.4057,
+ "step": 181350
+ },
+ {
+ "epoch": 0.9359150969193225,
+ "grad_norm": 25323.736328125,
+ "learning_rate": 9.80879877431593e-06,
+ "loss": 0.407,
+ "step": 181400
+ },
+ {
+ "epoch": 0.9361730669019352,
+ "grad_norm": 27513.087890625,
+ "learning_rate": 9.787078918024455e-06,
+ "loss": 0.3979,
+ "step": 181450
+ },
+ {
+ "epoch": 0.9364310368845481,
+ "grad_norm": 22324.669921875,
+ "learning_rate": 9.765380526611568e-06,
+ "loss": 0.3984,
+ "step": 181500
+ },
+ {
+ "epoch": 0.936689006867161,
+ "grad_norm": 23778.37890625,
+ "learning_rate": 9.743703611659465e-06,
+ "loss": 0.4055,
+ "step": 181550
+ },
+ {
+ "epoch": 0.9369469768497738,
+ "grad_norm": 26777.255859375,
+ "learning_rate": 9.722048184738864e-06,
+ "loss": 0.4047,
+ "step": 181600
+ },
+ {
+ "epoch": 0.9372049468323865,
+ "grad_norm": 23210.876953125,
+ "learning_rate": 9.700414257409002e-06,
+ "loss": 0.393,
+ "step": 181650
+ },
+ {
+ "epoch": 0.9374629168149994,
+ "grad_norm": 22539.84765625,
+ "learning_rate": 9.67880184121765e-06,
+ "loss": 0.4069,
+ "step": 181700
+ },
+ {
+ "epoch": 0.9377208867976122,
+ "grad_norm": 25191.609375,
+ "learning_rate": 9.65721094770109e-06,
+ "loss": 0.4069,
+ "step": 181750
+ },
+ {
+ "epoch": 0.9379788567802251,
+ "grad_norm": 23813.578125,
+ "learning_rate": 9.63564158838416e-06,
+ "loss": 0.3954,
+ "step": 181800
+ },
+ {
+ "epoch": 0.9382368267628379,
+ "grad_norm": 23869.703125,
+ "learning_rate": 9.614093774780114e-06,
+ "loss": 0.3998,
+ "step": 181850
+ },
+ {
+ "epoch": 0.9384947967454507,
+ "grad_norm": 23316.384765625,
+ "learning_rate": 9.5925675183908e-06,
+ "loss": 0.3989,
+ "step": 181900
+ },
+ {
+ "epoch": 0.9387527667280635,
+ "grad_norm": 23641.65625,
+ "learning_rate": 9.571062830706496e-06,
+ "loss": 0.4017,
+ "step": 181950
+ },
+ {
+ "epoch": 0.9390107367106764,
+ "grad_norm": 23724.431640625,
+ "learning_rate": 9.549579723205982e-06,
+ "loss": 0.4042,
+ "step": 182000
+ },
+ {
+ "epoch": 0.9392687066932892,
+ "grad_norm": 24013.849609375,
+ "learning_rate": 9.528118207356556e-06,
+ "loss": 0.3966,
+ "step": 182050
+ },
+ {
+ "epoch": 0.939526676675902,
+ "grad_norm": 21843.55859375,
+ "learning_rate": 9.506678294613919e-06,
+ "loss": 0.4051,
+ "step": 182100
+ },
+ {
+ "epoch": 0.9397846466585148,
+ "grad_norm": 22000.7734375,
+ "learning_rate": 9.485259996422313e-06,
+ "loss": 0.4042,
+ "step": 182150
+ },
+ {
+ "epoch": 0.9400426166411276,
+ "grad_norm": 23307.556640625,
+ "learning_rate": 9.463863324214395e-06,
+ "loss": 0.4018,
+ "step": 182200
+ },
+ {
+ "epoch": 0.9403005866237405,
+ "grad_norm": 22961.353515625,
+ "learning_rate": 9.4424882894113e-06,
+ "loss": 0.3991,
+ "step": 182250
+ },
+ {
+ "epoch": 0.9405585566063532,
+ "grad_norm": 24167.134765625,
+ "learning_rate": 9.421134903422607e-06,
+ "loss": 0.4033,
+ "step": 182300
+ },
+ {
+ "epoch": 0.9408165265889661,
+ "grad_norm": 24116.75,
+ "learning_rate": 9.399803177646339e-06,
+ "loss": 0.3979,
+ "step": 182350
+ },
+ {
+ "epoch": 0.9410744965715789,
+ "grad_norm": 25658.6640625,
+ "learning_rate": 9.378493123468946e-06,
+ "loss": 0.4093,
+ "step": 182400
+ },
+ {
+ "epoch": 0.9413324665541918,
+ "grad_norm": 27761.8828125,
+ "learning_rate": 9.357204752265341e-06,
+ "loss": 0.3974,
+ "step": 182450
+ },
+ {
+ "epoch": 0.9415904365368046,
+ "grad_norm": 23456.90234375,
+ "learning_rate": 9.335938075398842e-06,
+ "loss": 0.4072,
+ "step": 182500
+ },
+ {
+ "epoch": 0.9418484065194174,
+ "grad_norm": 21258.984375,
+ "learning_rate": 9.314693104221184e-06,
+ "loss": 0.3952,
+ "step": 182550
+ },
+ {
+ "epoch": 0.9421063765020302,
+ "grad_norm": 22634.01953125,
+ "learning_rate": 9.293469850072522e-06,
+ "loss": 0.402,
+ "step": 182600
+ },
+ {
+ "epoch": 0.9423643464846431,
+ "grad_norm": 22349.267578125,
+ "learning_rate": 9.272268324281407e-06,
+ "loss": 0.3974,
+ "step": 182650
+ },
+ {
+ "epoch": 0.9426223164672559,
+ "grad_norm": 23658.505859375,
+ "learning_rate": 9.251088538164837e-06,
+ "loss": 0.3979,
+ "step": 182700
+ },
+ {
+ "epoch": 0.9428802864498687,
+ "grad_norm": 26879.39453125,
+ "learning_rate": 9.229930503028129e-06,
+ "loss": 0.3965,
+ "step": 182750
+ },
+ {
+ "epoch": 0.9431382564324815,
+ "grad_norm": 25313.255859375,
+ "learning_rate": 9.208794230165058e-06,
+ "loss": 0.4049,
+ "step": 182800
+ },
+ {
+ "epoch": 0.9433962264150944,
+ "grad_norm": 26135.587890625,
+ "learning_rate": 9.187679730857756e-06,
+ "loss": 0.408,
+ "step": 182850
+ },
+ {
+ "epoch": 0.9436541963977072,
+ "grad_norm": 24064.087890625,
+ "learning_rate": 9.166587016376715e-06,
+ "loss": 0.4025,
+ "step": 182900
+ },
+ {
+ "epoch": 0.9439121663803199,
+ "grad_norm": 24475.30859375,
+ "learning_rate": 9.145516097980856e-06,
+ "loss": 0.4019,
+ "step": 182950
+ },
+ {
+ "epoch": 0.9441701363629328,
+ "grad_norm": 23691.06640625,
+ "learning_rate": 9.12446698691738e-06,
+ "loss": 0.4031,
+ "step": 183000
+ },
+ {
+ "epoch": 0.9444281063455456,
+ "grad_norm": 25653.37109375,
+ "learning_rate": 9.103439694421928e-06,
+ "loss": 0.4007,
+ "step": 183050
+ },
+ {
+ "epoch": 0.9446860763281585,
+ "grad_norm": 22718.71875,
+ "learning_rate": 9.08243423171845e-06,
+ "loss": 0.3996,
+ "step": 183100
+ },
+ {
+ "epoch": 0.9449440463107712,
+ "grad_norm": 23337.986328125,
+ "learning_rate": 9.061450610019262e-06,
+ "loss": 0.4043,
+ "step": 183150
+ },
+ {
+ "epoch": 0.9452020162933841,
+ "grad_norm": 27628.021484375,
+ "learning_rate": 9.040488840525001e-06,
+ "loss": 0.409,
+ "step": 183200
+ },
+ {
+ "epoch": 0.9454599862759969,
+ "grad_norm": 22894.26953125,
+ "learning_rate": 9.01954893442467e-06,
+ "loss": 0.4026,
+ "step": 183250
+ },
+ {
+ "epoch": 0.9457179562586098,
+ "grad_norm": 27624.564453125,
+ "learning_rate": 8.998630902895566e-06,
+ "loss": 0.4011,
+ "step": 183300
+ },
+ {
+ "epoch": 0.9459759262412226,
+ "grad_norm": 25944.05859375,
+ "learning_rate": 8.977734757103351e-06,
+ "loss": 0.3995,
+ "step": 183350
+ },
+ {
+ "epoch": 0.9462338962238354,
+ "grad_norm": 27243.31640625,
+ "learning_rate": 8.95686050820197e-06,
+ "loss": 0.3983,
+ "step": 183400
+ },
+ {
+ "epoch": 0.9464918662064482,
+ "grad_norm": 24556.611328125,
+ "learning_rate": 8.936008167333699e-06,
+ "loss": 0.4041,
+ "step": 183450
+ },
+ {
+ "epoch": 0.9467498361890611,
+ "grad_norm": 22205.880859375,
+ "learning_rate": 8.915177745629112e-06,
+ "loss": 0.3973,
+ "step": 183500
+ },
+ {
+ "epoch": 0.9470078061716739,
+ "grad_norm": 26829.6328125,
+ "learning_rate": 8.894369254207069e-06,
+ "loss": 0.4023,
+ "step": 183550
+ },
+ {
+ "epoch": 0.9472657761542866,
+ "grad_norm": 24388.59765625,
+ "learning_rate": 8.873582704174776e-06,
+ "loss": 0.397,
+ "step": 183600
+ },
+ {
+ "epoch": 0.9475237461368995,
+ "grad_norm": 25665.98828125,
+ "learning_rate": 8.852818106627647e-06,
+ "loss": 0.4055,
+ "step": 183650
+ },
+ {
+ "epoch": 0.9477817161195123,
+ "grad_norm": 24880.47265625,
+ "learning_rate": 8.83207547264946e-06,
+ "loss": 0.4016,
+ "step": 183700
+ },
+ {
+ "epoch": 0.9480396861021252,
+ "grad_norm": 26516.6953125,
+ "learning_rate": 8.81135481331221e-06,
+ "loss": 0.3992,
+ "step": 183750
+ },
+ {
+ "epoch": 0.9482976560847379,
+ "grad_norm": 22604.123046875,
+ "learning_rate": 8.790656139676179e-06,
+ "loss": 0.401,
+ "step": 183800
+ },
+ {
+ "epoch": 0.9485556260673508,
+ "grad_norm": 24668.94921875,
+ "learning_rate": 8.769979462789957e-06,
+ "loss": 0.3974,
+ "step": 183850
+ },
+ {
+ "epoch": 0.9488135960499636,
+ "grad_norm": 26522.896484375,
+ "learning_rate": 8.749324793690295e-06,
+ "loss": 0.4048,
+ "step": 183900
+ },
+ {
+ "epoch": 0.9490715660325765,
+ "grad_norm": 26786.48046875,
+ "learning_rate": 8.728692143402295e-06,
+ "loss": 0.4075,
+ "step": 183950
+ },
+ {
+ "epoch": 0.9493295360151893,
+ "grad_norm": 23683.54296875,
+ "learning_rate": 8.708081522939265e-06,
+ "loss": 0.3996,
+ "step": 184000
+ },
+ {
+ "epoch": 0.9495875059978021,
+ "grad_norm": 23064.400390625,
+ "learning_rate": 8.687492943302739e-06,
+ "loss": 0.4036,
+ "step": 184050
+ },
+ {
+ "epoch": 0.9498454759804149,
+ "grad_norm": 24142.4921875,
+ "learning_rate": 8.666926415482501e-06,
+ "loss": 0.4023,
+ "step": 184100
+ },
+ {
+ "epoch": 0.9501034459630278,
+ "grad_norm": 24012.076171875,
+ "learning_rate": 8.6463819504566e-06,
+ "loss": 0.4024,
+ "step": 184150
+ },
+ {
+ "epoch": 0.9503614159456406,
+ "grad_norm": 22214.41015625,
+ "learning_rate": 8.625859559191224e-06,
+ "loss": 0.4002,
+ "step": 184200
+ },
+ {
+ "epoch": 0.9506193859282533,
+ "grad_norm": 24664.162109375,
+ "learning_rate": 8.60535925264086e-06,
+ "loss": 0.4027,
+ "step": 184250
+ },
+ {
+ "epoch": 0.9508773559108662,
+ "grad_norm": 21136.900390625,
+ "learning_rate": 8.584881041748171e-06,
+ "loss": 0.3957,
+ "step": 184300
+ },
+ {
+ "epoch": 0.951135325893479,
+ "grad_norm": 22411.33984375,
+ "learning_rate": 8.56442493744401e-06,
+ "loss": 0.3977,
+ "step": 184350
+ },
+ {
+ "epoch": 0.9513932958760919,
+ "grad_norm": 23004.173828125,
+ "learning_rate": 8.54399095064749e-06,
+ "loss": 0.4014,
+ "step": 184400
+ },
+ {
+ "epoch": 0.9516512658587046,
+ "grad_norm": 23692.26171875,
+ "learning_rate": 8.523579092265827e-06,
+ "loss": 0.4013,
+ "step": 184450
+ },
+ {
+ "epoch": 0.9519092358413175,
+ "grad_norm": 25310.919921875,
+ "learning_rate": 8.503189373194509e-06,
+ "loss": 0.3961,
+ "step": 184500
+ },
+ {
+ "epoch": 0.9521672058239303,
+ "grad_norm": 25963.943359375,
+ "learning_rate": 8.482821804317171e-06,
+ "loss": 0.4049,
+ "step": 184550
+ },
+ {
+ "epoch": 0.9524251758065432,
+ "grad_norm": 24282.115234375,
+ "learning_rate": 8.46247639650562e-06,
+ "loss": 0.4008,
+ "step": 184600
+ },
+ {
+ "epoch": 0.952683145789156,
+ "grad_norm": 24703.26953125,
+ "learning_rate": 8.442153160619837e-06,
+ "loss": 0.4063,
+ "step": 184650
+ },
+ {
+ "epoch": 0.9529411157717688,
+ "grad_norm": 23616.09375,
+ "learning_rate": 8.421852107507966e-06,
+ "loss": 0.3974,
+ "step": 184700
+ },
+ {
+ "epoch": 0.9531990857543816,
+ "grad_norm": 25447.408203125,
+ "learning_rate": 8.40157324800634e-06,
+ "loss": 0.4066,
+ "step": 184750
+ },
+ {
+ "epoch": 0.9534570557369945,
+ "grad_norm": 25534.3984375,
+ "learning_rate": 8.381316592939403e-06,
+ "loss": 0.4027,
+ "step": 184800
+ },
+ {
+ "epoch": 0.9537150257196073,
+ "grad_norm": 24251.138671875,
+ "learning_rate": 8.361082153119777e-06,
+ "loss": 0.3958,
+ "step": 184850
+ },
+ {
+ "epoch": 0.95397299570222,
+ "grad_norm": 26980.046875,
+ "learning_rate": 8.3408699393482e-06,
+ "loss": 0.4058,
+ "step": 184900
+ },
+ {
+ "epoch": 0.9542309656848329,
+ "grad_norm": 26143.732421875,
+ "learning_rate": 8.320679962413574e-06,
+ "loss": 0.4006,
+ "step": 184950
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "grad_norm": 24566.15234375,
+ "learning_rate": 8.300512233092893e-06,
+ "loss": 0.405,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9544889356674457,
+ "eval_loss": 0.3880694806575775,
+ "eval_runtime": 3197.8794,
+ "eval_samples_per_second": 969.743,
+ "eval_steps_per_second": 1.894,
+ "step": 185000
+ },
+ {
+ "epoch": 0.9547469056500586,
+ "grad_norm": 22463.359375,
+ "learning_rate": 8.280366762151349e-06,
+ "loss": 0.4035,
+ "step": 185050
+ },
+ {
+ "epoch": 0.9550048756326713,
+ "grad_norm": 23964.845703125,
+ "learning_rate": 8.260243560342146e-06,
+ "loss": 0.399,
+ "step": 185100
+ },
+ {
+ "epoch": 0.9552628456152842,
+ "grad_norm": 22267.978515625,
+ "learning_rate": 8.2401426384067e-06,
+ "loss": 0.4065,
+ "step": 185150
+ },
+ {
+ "epoch": 0.955520815597897,
+ "grad_norm": 23959.732421875,
+ "learning_rate": 8.220064007074485e-06,
+ "loss": 0.3988,
+ "step": 185200
+ },
+ {
+ "epoch": 0.9557787855805099,
+ "grad_norm": 22042.95703125,
+ "learning_rate": 8.200007677063066e-06,
+ "loss": 0.4005,
+ "step": 185250
+ },
+ {
+ "epoch": 0.9560367555631226,
+ "grad_norm": 23760.798828125,
+ "learning_rate": 8.17997365907816e-06,
+ "loss": 0.4043,
+ "step": 185300
+ },
+ {
+ "epoch": 0.9562947255457355,
+ "grad_norm": 23235.8828125,
+ "learning_rate": 8.1599619638135e-06,
+ "loss": 0.3999,
+ "step": 185350
+ },
+ {
+ "epoch": 0.9565526955283483,
+ "grad_norm": 22637.701171875,
+ "learning_rate": 8.139972601950967e-06,
+ "loss": 0.4004,
+ "step": 185400
+ },
+ {
+ "epoch": 0.9568106655109612,
+ "grad_norm": 28806.810546875,
+ "learning_rate": 8.120005584160489e-06,
+ "loss": 0.4022,
+ "step": 185450
+ },
+ {
+ "epoch": 0.957068635493574,
+ "grad_norm": 22143.8203125,
+ "learning_rate": 8.100060921100067e-06,
+ "loss": 0.3977,
+ "step": 185500
+ },
+ {
+ "epoch": 0.9573266054761868,
+ "grad_norm": 22921.810546875,
+ "learning_rate": 8.080138623415783e-06,
+ "loss": 0.4,
+ "step": 185550
+ },
+ {
+ "epoch": 0.9575845754587996,
+ "grad_norm": 25425.640625,
+ "learning_rate": 8.060238701741762e-06,
+ "loss": 0.4021,
+ "step": 185600
+ },
+ {
+ "epoch": 0.9578425454414125,
+ "grad_norm": 27279.6796875,
+ "learning_rate": 8.040361166700216e-06,
+ "loss": 0.4064,
+ "step": 185650
+ },
+ {
+ "epoch": 0.9581005154240253,
+ "grad_norm": 25144.322265625,
+ "learning_rate": 8.020506028901376e-06,
+ "loss": 0.4031,
+ "step": 185700
+ },
+ {
+ "epoch": 0.958358485406638,
+ "grad_norm": 21046.607421875,
+ "learning_rate": 8.000673298943534e-06,
+ "loss": 0.4041,
+ "step": 185750
+ },
+ {
+ "epoch": 0.9586164553892509,
+ "grad_norm": 23166.087890625,
+ "learning_rate": 7.980862987413018e-06,
+ "loss": 0.3996,
+ "step": 185800
+ },
+ {
+ "epoch": 0.9588744253718637,
+ "grad_norm": 23506.693359375,
+ "learning_rate": 7.961075104884186e-06,
+ "loss": 0.3973,
+ "step": 185850
+ },
+ {
+ "epoch": 0.9591323953544766,
+ "grad_norm": 25975.408203125,
+ "learning_rate": 7.94130966191941e-06,
+ "loss": 0.4048,
+ "step": 185900
+ },
+ {
+ "epoch": 0.9593903653370893,
+ "grad_norm": 23704.638671875,
+ "learning_rate": 7.921566669069147e-06,
+ "loss": 0.4045,
+ "step": 185950
+ },
+ {
+ "epoch": 0.9596483353197022,
+ "grad_norm": 27402.2421875,
+ "learning_rate": 7.901846136871766e-06,
+ "loss": 0.4007,
+ "step": 186000
+ },
+ {
+ "epoch": 0.959906305302315,
+ "grad_norm": 23186.658203125,
+ "learning_rate": 7.882148075853752e-06,
+ "loss": 0.4072,
+ "step": 186050
+ },
+ {
+ "epoch": 0.9601642752849279,
+ "grad_norm": 24789.619140625,
+ "learning_rate": 7.862472496529528e-06,
+ "loss": 0.4056,
+ "step": 186100
+ },
+ {
+ "epoch": 0.9604222452675407,
+ "grad_norm": 23849.71875,
+ "learning_rate": 7.842819409401524e-06,
+ "loss": 0.4067,
+ "step": 186150
+ },
+ {
+ "epoch": 0.9606802152501535,
+ "grad_norm": 24820.765625,
+ "learning_rate": 7.823188824960221e-06,
+ "loss": 0.4071,
+ "step": 186200
+ },
+ {
+ "epoch": 0.9609381852327663,
+ "grad_norm": 23276.568359375,
+ "learning_rate": 7.803580753683992e-06,
+ "loss": 0.3989,
+ "step": 186250
+ },
+ {
+ "epoch": 0.9611961552153792,
+ "grad_norm": 21064.8984375,
+ "learning_rate": 7.783995206039279e-06,
+ "loss": 0.3994,
+ "step": 186300
+ },
+ {
+ "epoch": 0.961454125197992,
+ "grad_norm": 27310.30078125,
+ "learning_rate": 7.764432192480464e-06,
+ "loss": 0.4015,
+ "step": 186350
+ },
+ {
+ "epoch": 0.9617120951806047,
+ "grad_norm": 24786.1796875,
+ "learning_rate": 7.744891723449888e-06,
+ "loss": 0.4042,
+ "step": 186400
+ },
+ {
+ "epoch": 0.9619700651632176,
+ "grad_norm": 22362.47265625,
+ "learning_rate": 7.725373809377911e-06,
+ "loss": 0.3991,
+ "step": 186450
+ },
+ {
+ "epoch": 0.9622280351458304,
+ "grad_norm": 23751.4296875,
+ "learning_rate": 7.705878460682775e-06,
+ "loss": 0.3988,
+ "step": 186500
+ },
+ {
+ "epoch": 0.9624860051284433,
+ "grad_norm": 22956.935546875,
+ "learning_rate": 7.686405687770748e-06,
+ "loss": 0.4049,
+ "step": 186550
+ },
+ {
+ "epoch": 0.962743975111056,
+ "grad_norm": 25276.861328125,
+ "learning_rate": 7.666955501036006e-06,
+ "loss": 0.4005,
+ "step": 186600
+ },
+ {
+ "epoch": 0.9630019450936689,
+ "grad_norm": 22390.625,
+ "learning_rate": 7.647527910860691e-06,
+ "loss": 0.4008,
+ "step": 186650
+ },
+ {
+ "epoch": 0.9632599150762817,
+ "grad_norm": 28946.125,
+ "learning_rate": 7.628122927614856e-06,
+ "loss": 0.3987,
+ "step": 186700
+ },
+ {
+ "epoch": 0.9635178850588946,
+ "grad_norm": 23663.3125,
+ "learning_rate": 7.608740561656541e-06,
+ "loss": 0.4006,
+ "step": 186750
+ },
+ {
+ "epoch": 0.9637758550415074,
+ "grad_norm": 21705.16015625,
+ "learning_rate": 7.589380823331632e-06,
+ "loss": 0.4023,
+ "step": 186800
+ },
+ {
+ "epoch": 0.9640338250241202,
+ "grad_norm": 25353.228515625,
+ "learning_rate": 7.570043722974019e-06,
+ "loss": 0.4006,
+ "step": 186850
+ },
+ {
+ "epoch": 0.964291795006733,
+ "grad_norm": 26046.412109375,
+ "learning_rate": 7.55072927090546e-06,
+ "loss": 0.3931,
+ "step": 186900
+ },
+ {
+ "epoch": 0.9645497649893459,
+ "grad_norm": 25989.2578125,
+ "learning_rate": 7.531437477435621e-06,
+ "loss": 0.3989,
+ "step": 186950
+ },
+ {
+ "epoch": 0.9648077349719587,
+ "grad_norm": 22714.423828125,
+ "learning_rate": 7.51216835286212e-06,
+ "loss": 0.4018,
+ "step": 187000
+ },
+ {
+ "epoch": 0.9650657049545714,
+ "grad_norm": 26353.42578125,
+ "learning_rate": 7.492921907470407e-06,
+ "loss": 0.4056,
+ "step": 187050
+ },
+ {
+ "epoch": 0.9653236749371843,
+ "grad_norm": 23085.212890625,
+ "learning_rate": 7.4736981515338864e-06,
+ "loss": 0.3995,
+ "step": 187100
+ },
+ {
+ "epoch": 0.9655816449197971,
+ "grad_norm": 23125.970703125,
+ "learning_rate": 7.454497095313817e-06,
+ "loss": 0.4069,
+ "step": 187150
+ },
+ {
+ "epoch": 0.96583961490241,
+ "grad_norm": 23488.2265625,
+ "learning_rate": 7.435318749059356e-06,
+ "loss": 0.4039,
+ "step": 187200
+ },
+ {
+ "epoch": 0.9660975848850227,
+ "grad_norm": 22577.46875,
+ "learning_rate": 7.4161631230075305e-06,
+ "loss": 0.4051,
+ "step": 187250
+ },
+ {
+ "epoch": 0.9663555548676356,
+ "grad_norm": 22637.890625,
+ "learning_rate": 7.397030227383228e-06,
+ "loss": 0.3986,
+ "step": 187300
+ },
+ {
+ "epoch": 0.9666135248502484,
+ "grad_norm": 26084.412109375,
+ "learning_rate": 7.377920072399247e-06,
+ "loss": 0.398,
+ "step": 187350
+ },
+ {
+ "epoch": 0.9668714948328613,
+ "grad_norm": 25263.6328125,
+ "learning_rate": 7.3588326682562e-06,
+ "loss": 0.4035,
+ "step": 187400
+ },
+ {
+ "epoch": 0.9671294648154741,
+ "grad_norm": 22348.236328125,
+ "learning_rate": 7.339768025142573e-06,
+ "loss": 0.4003,
+ "step": 187450
+ },
+ {
+ "epoch": 0.9673874347980869,
+ "grad_norm": 23006.091796875,
+ "learning_rate": 7.320726153234714e-06,
+ "loss": 0.399,
+ "step": 187500
+ },
+ {
+ "epoch": 0.9676454047806997,
+ "grad_norm": 24137.44921875,
+ "learning_rate": 7.301707062696794e-06,
+ "loss": 0.3999,
+ "step": 187550
+ },
+ {
+ "epoch": 0.9679033747633126,
+ "grad_norm": 26101.837890625,
+ "learning_rate": 7.282710763680828e-06,
+ "loss": 0.4007,
+ "step": 187600
+ },
+ {
+ "epoch": 0.9681613447459254,
+ "grad_norm": 21417.814453125,
+ "learning_rate": 7.263737266326709e-06,
+ "loss": 0.3994,
+ "step": 187650
+ },
+ {
+ "epoch": 0.9684193147285381,
+ "grad_norm": 25831.45703125,
+ "learning_rate": 7.244786580762075e-06,
+ "loss": 0.3925,
+ "step": 187700
+ },
+ {
+ "epoch": 0.968677284711151,
+ "grad_norm": 24546.84765625,
+ "learning_rate": 7.225858717102474e-06,
+ "loss": 0.4004,
+ "step": 187750
+ },
+ {
+ "epoch": 0.9689352546937638,
+ "grad_norm": 23773.09765625,
+ "learning_rate": 7.206953685451212e-06,
+ "loss": 0.4041,
+ "step": 187800
+ },
+ {
+ "epoch": 0.9691932246763767,
+ "grad_norm": 23538.923828125,
+ "learning_rate": 7.188071495899423e-06,
+ "loss": 0.3971,
+ "step": 187850
+ },
+ {
+ "epoch": 0.9694511946589894,
+ "grad_norm": 24968.310546875,
+ "learning_rate": 7.169212158526084e-06,
+ "loss": 0.4047,
+ "step": 187900
+ },
+ {
+ "epoch": 0.9697091646416023,
+ "grad_norm": 24379.23828125,
+ "learning_rate": 7.150375683397908e-06,
+ "loss": 0.3983,
+ "step": 187950
+ },
+ {
+ "epoch": 0.9699671346242151,
+ "grad_norm": 25501.638671875,
+ "learning_rate": 7.131562080569465e-06,
+ "loss": 0.4024,
+ "step": 188000
+ },
+ {
+ "epoch": 0.970225104606828,
+ "grad_norm": 24917.73046875,
+ "learning_rate": 7.112771360083087e-06,
+ "loss": 0.3998,
+ "step": 188050
+ },
+ {
+ "epoch": 0.9704830745894407,
+ "grad_norm": 24725.638671875,
+ "learning_rate": 7.094003531968896e-06,
+ "loss": 0.3964,
+ "step": 188100
+ },
+ {
+ "epoch": 0.9707410445720536,
+ "grad_norm": 23913.5703125,
+ "learning_rate": 7.075258606244789e-06,
+ "loss": 0.3987,
+ "step": 188150
+ },
+ {
+ "epoch": 0.9709990145546664,
+ "grad_norm": 25010.09375,
+ "learning_rate": 7.05653659291644e-06,
+ "loss": 0.4021,
+ "step": 188200
+ },
+ {
+ "epoch": 0.9712569845372793,
+ "grad_norm": 25357.556640625,
+ "learning_rate": 7.037837501977318e-06,
+ "loss": 0.4007,
+ "step": 188250
+ },
+ {
+ "epoch": 0.9715149545198921,
+ "grad_norm": 24599.890625,
+ "learning_rate": 7.019161343408625e-06,
+ "loss": 0.3962,
+ "step": 188300
+ },
+ {
+ "epoch": 0.9717729245025049,
+ "grad_norm": 25866.2734375,
+ "learning_rate": 7.000508127179328e-06,
+ "loss": 0.3983,
+ "step": 188350
+ },
+ {
+ "epoch": 0.9720308944851177,
+ "grad_norm": 22591.40625,
+ "learning_rate": 6.981877863246161e-06,
+ "loss": 0.3971,
+ "step": 188400
+ },
+ {
+ "epoch": 0.9722888644677306,
+ "grad_norm": 20752.091796875,
+ "learning_rate": 6.963270561553586e-06,
+ "loss": 0.3946,
+ "step": 188450
+ },
+ {
+ "epoch": 0.9725468344503434,
+ "grad_norm": 22927.109375,
+ "learning_rate": 6.94468623203382e-06,
+ "loss": 0.4036,
+ "step": 188500
+ },
+ {
+ "epoch": 0.9728048044329561,
+ "grad_norm": 27096.041015625,
+ "learning_rate": 6.92612488460685e-06,
+ "loss": 0.3982,
+ "step": 188550
+ },
+ {
+ "epoch": 0.973062774415569,
+ "grad_norm": 24426.93359375,
+ "learning_rate": 6.907586529180321e-06,
+ "loss": 0.4054,
+ "step": 188600
+ },
+ {
+ "epoch": 0.9733207443981818,
+ "grad_norm": 25097.658203125,
+ "learning_rate": 6.889071175649669e-06,
+ "loss": 0.4015,
+ "step": 188650
+ },
+ {
+ "epoch": 0.9735787143807947,
+ "grad_norm": 24646.548828125,
+ "learning_rate": 6.870578833898033e-06,
+ "loss": 0.3977,
+ "step": 188700
+ },
+ {
+ "epoch": 0.9738366843634074,
+ "grad_norm": 23465.357421875,
+ "learning_rate": 6.852109513796257e-06,
+ "loss": 0.396,
+ "step": 188750
+ },
+ {
+ "epoch": 0.9740946543460203,
+ "grad_norm": 22382.603515625,
+ "learning_rate": 6.83366322520293e-06,
+ "loss": 0.4018,
+ "step": 188800
+ },
+ {
+ "epoch": 0.9743526243286331,
+ "grad_norm": 24666.61328125,
+ "learning_rate": 6.815239977964283e-06,
+ "loss": 0.4046,
+ "step": 188850
+ },
+ {
+ "epoch": 0.974610594311246,
+ "grad_norm": 25308.685546875,
+ "learning_rate": 6.796839781914321e-06,
+ "loss": 0.3998,
+ "step": 188900
+ },
+ {
+ "epoch": 0.9748685642938588,
+ "grad_norm": 24856.64453125,
+ "learning_rate": 6.778462646874706e-06,
+ "loss": 0.4014,
+ "step": 188950
+ },
+ {
+ "epoch": 0.9751265342764716,
+ "grad_norm": 27452.50390625,
+ "learning_rate": 6.760108582654795e-06,
+ "loss": 0.4008,
+ "step": 189000
+ },
+ {
+ "epoch": 0.9753845042590844,
+ "grad_norm": 25027.416015625,
+ "learning_rate": 6.741777599051629e-06,
+ "loss": 0.4006,
+ "step": 189050
+ },
+ {
+ "epoch": 0.9756424742416973,
+ "grad_norm": 24687.740234375,
+ "learning_rate": 6.723469705849927e-06,
+ "loss": 0.4056,
+ "step": 189100
+ },
+ {
+ "epoch": 0.9759004442243101,
+ "grad_norm": 24812.55078125,
+ "learning_rate": 6.705184912822105e-06,
+ "loss": 0.4043,
+ "step": 189150
+ },
+ {
+ "epoch": 0.9761584142069228,
+ "grad_norm": 25776.005859375,
+ "learning_rate": 6.686923229728214e-06,
+ "loss": 0.4052,
+ "step": 189200
+ },
+ {
+ "epoch": 0.9764163841895357,
+ "grad_norm": 24319.34765625,
+ "learning_rate": 6.668684666316005e-06,
+ "loss": 0.4014,
+ "step": 189250
+ },
+ {
+ "epoch": 0.9766743541721485,
+ "grad_norm": 28024.419921875,
+ "learning_rate": 6.650469232320839e-06,
+ "loss": 0.3991,
+ "step": 189300
+ },
+ {
+ "epoch": 0.9769323241547614,
+ "grad_norm": 25074.068359375,
+ "learning_rate": 6.6322769374658085e-06,
+ "loss": 0.4034,
+ "step": 189350
+ },
+ {
+ "epoch": 0.9771902941373741,
+ "grad_norm": 21126.572265625,
+ "learning_rate": 6.61410779146156e-06,
+ "loss": 0.3998,
+ "step": 189400
+ },
+ {
+ "epoch": 0.977448264119987,
+ "grad_norm": 25041.337890625,
+ "learning_rate": 6.595961804006467e-06,
+ "loss": 0.4012,
+ "step": 189450
+ },
+ {
+ "epoch": 0.9777062341025998,
+ "grad_norm": 25474.263671875,
+ "learning_rate": 6.577838984786489e-06,
+ "loss": 0.3991,
+ "step": 189500
+ },
+ {
+ "epoch": 0.9779642040852127,
+ "grad_norm": 22192.98828125,
+ "learning_rate": 6.55973934347523e-06,
+ "loss": 0.3965,
+ "step": 189550
+ },
+ {
+ "epoch": 0.9782221740678255,
+ "grad_norm": 24587.9453125,
+ "learning_rate": 6.5416628897339625e-06,
+ "loss": 0.4008,
+ "step": 189600
+ },
+ {
+ "epoch": 0.9784801440504383,
+ "grad_norm": 23246.314453125,
+ "learning_rate": 6.523609633211497e-06,
+ "loss": 0.4036,
+ "step": 189650
+ },
+ {
+ "epoch": 0.9787381140330511,
+ "grad_norm": 24233.033203125,
+ "learning_rate": 6.505579583544353e-06,
+ "loss": 0.4002,
+ "step": 189700
+ },
+ {
+ "epoch": 0.978996084015664,
+ "grad_norm": 24149.6953125,
+ "learning_rate": 6.487572750356602e-06,
+ "loss": 0.4043,
+ "step": 189750
+ },
+ {
+ "epoch": 0.9792540539982768,
+ "grad_norm": 25376.3046875,
+ "learning_rate": 6.469589143259952e-06,
+ "loss": 0.3997,
+ "step": 189800
+ },
+ {
+ "epoch": 0.9795120239808895,
+ "grad_norm": 25878.90625,
+ "learning_rate": 6.451628771853696e-06,
+ "loss": 0.3936,
+ "step": 189850
+ },
+ {
+ "epoch": 0.9797699939635024,
+ "grad_norm": 24123.169921875,
+ "learning_rate": 6.433691645724743e-06,
+ "loss": 0.3976,
+ "step": 189900
+ },
+ {
+ "epoch": 0.9800279639461152,
+ "grad_norm": 23894.5625,
+ "learning_rate": 6.4157777744475626e-06,
+ "loss": 0.4025,
+ "step": 189950
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "grad_norm": 27271.9609375,
+ "learning_rate": 6.3978871675842544e-06,
+ "loss": 0.4007,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9802859339287281,
+ "eval_loss": 0.3872862458229065,
+ "eval_runtime": 3184.1416,
+ "eval_samples_per_second": 973.927,
+ "eval_steps_per_second": 1.902,
+ "step": 190000
+ },
+ {
+ "epoch": 0.9805439039113408,
+ "grad_norm": 25592.9296875,
+ "learning_rate": 6.380019834684475e-06,
+ "loss": 0.4041,
+ "step": 190050
+ },
+ {
+ "epoch": 0.9808018738939537,
+ "grad_norm": 22425.51953125,
+ "learning_rate": 6.362175785285457e-06,
+ "loss": 0.4028,
+ "step": 190100
+ },
+ {
+ "epoch": 0.9810598438765665,
+ "grad_norm": 25178.28125,
+ "learning_rate": 6.344355028912008e-06,
+ "loss": 0.3972,
+ "step": 190150
+ },
+ {
+ "epoch": 0.9813178138591794,
+ "grad_norm": 25157.537109375,
+ "learning_rate": 6.326557575076486e-06,
+ "loss": 0.3989,
+ "step": 190200
+ },
+ {
+ "epoch": 0.9815757838417921,
+ "grad_norm": 23774.67578125,
+ "learning_rate": 6.3087834332788695e-06,
+ "loss": 0.4057,
+ "step": 190250
+ },
+ {
+ "epoch": 0.981833753824405,
+ "grad_norm": 25307.736328125,
+ "learning_rate": 6.2910326130066035e-06,
+ "loss": 0.3946,
+ "step": 190300
+ },
+ {
+ "epoch": 0.9820917238070178,
+ "grad_norm": 28657.8125,
+ "learning_rate": 6.273305123734769e-06,
+ "loss": 0.4006,
+ "step": 190350
+ },
+ {
+ "epoch": 0.9823496937896307,
+ "grad_norm": 24404.603515625,
+ "learning_rate": 6.255600974925935e-06,
+ "loss": 0.3998,
+ "step": 190400
+ },
+ {
+ "epoch": 0.9826076637722435,
+ "grad_norm": 22460.1640625,
+ "learning_rate": 6.237920176030232e-06,
+ "loss": 0.4039,
+ "step": 190450
+ },
+ {
+ "epoch": 0.9828656337548562,
+ "grad_norm": 27335.625,
+ "learning_rate": 6.220262736485355e-06,
+ "loss": 0.3937,
+ "step": 190500
+ },
+ {
+ "epoch": 0.9831236037374691,
+ "grad_norm": 27996.9765625,
+ "learning_rate": 6.202628665716464e-06,
+ "loss": 0.4025,
+ "step": 190550
+ },
+ {
+ "epoch": 0.983381573720082,
+ "grad_norm": 23532.66796875,
+ "learning_rate": 6.18501797313632e-06,
+ "loss": 0.4007,
+ "step": 190600
+ },
+ {
+ "epoch": 0.9836395437026948,
+ "grad_norm": 27360.333984375,
+ "learning_rate": 6.167430668145146e-06,
+ "loss": 0.3994,
+ "step": 190650
+ },
+ {
+ "epoch": 0.9838975136853075,
+ "grad_norm": 23754.23828125,
+ "learning_rate": 6.149866760130718e-06,
+ "loss": 0.4043,
+ "step": 190700
+ },
+ {
+ "epoch": 0.9841554836679204,
+ "grad_norm": 24313.943359375,
+ "learning_rate": 6.1323262584683075e-06,
+ "loss": 0.4039,
+ "step": 190750
+ },
+ {
+ "epoch": 0.9844134536505332,
+ "grad_norm": 22932.11328125,
+ "learning_rate": 6.114809172520686e-06,
+ "loss": 0.3977,
+ "step": 190800
+ },
+ {
+ "epoch": 0.9846714236331461,
+ "grad_norm": 27614.103515625,
+ "learning_rate": 6.097315511638135e-06,
+ "loss": 0.405,
+ "step": 190850
+ },
+ {
+ "epoch": 0.9849293936157588,
+ "grad_norm": 21648.470703125,
+ "learning_rate": 6.079845285158447e-06,
+ "loss": 0.403,
+ "step": 190900
+ },
+ {
+ "epoch": 0.9851873635983717,
+ "grad_norm": 25720.76953125,
+ "learning_rate": 6.0623985024068854e-06,
+ "loss": 0.4069,
+ "step": 190950
+ },
+ {
+ "epoch": 0.9854453335809845,
+ "grad_norm": 22051.30078125,
+ "learning_rate": 6.044975172696199e-06,
+ "loss": 0.4062,
+ "step": 191000
+ },
+ {
+ "epoch": 0.9857033035635974,
+ "grad_norm": 27862.138671875,
+ "learning_rate": 6.027575305326621e-06,
+ "loss": 0.4029,
+ "step": 191050
+ },
+ {
+ "epoch": 0.9859612735462102,
+ "grad_norm": 24624.951171875,
+ "learning_rate": 6.010198909585862e-06,
+ "loss": 0.3995,
+ "step": 191100
+ },
+ {
+ "epoch": 0.986219243528823,
+ "grad_norm": 23278.45703125,
+ "learning_rate": 5.992845994749136e-06,
+ "loss": 0.3981,
+ "step": 191150
+ },
+ {
+ "epoch": 0.9864772135114358,
+ "grad_norm": 27549.26953125,
+ "learning_rate": 5.975516570079048e-06,
+ "loss": 0.3999,
+ "step": 191200
+ },
+ {
+ "epoch": 0.9867351834940487,
+ "grad_norm": 24570.40625,
+ "learning_rate": 5.95821064482574e-06,
+ "loss": 0.4052,
+ "step": 191250
+ },
+ {
+ "epoch": 0.9869931534766615,
+ "grad_norm": 23672.029296875,
+ "learning_rate": 5.9409282282267665e-06,
+ "loss": 0.4045,
+ "step": 191300
+ },
+ {
+ "epoch": 0.9872511234592742,
+ "grad_norm": 22627.697265625,
+ "learning_rate": 5.923669329507148e-06,
+ "loss": 0.4017,
+ "step": 191350
+ },
+ {
+ "epoch": 0.9875090934418871,
+ "grad_norm": 22583.0390625,
+ "learning_rate": 5.906433957879365e-06,
+ "loss": 0.399,
+ "step": 191400
+ },
+ {
+ "epoch": 0.9877670634244999,
+ "grad_norm": 22665.984375,
+ "learning_rate": 5.889222122543298e-06,
+ "loss": 0.3989,
+ "step": 191450
+ },
+ {
+ "epoch": 0.9880250334071128,
+ "grad_norm": 25125.6640625,
+ "learning_rate": 5.872033832686319e-06,
+ "loss": 0.4001,
+ "step": 191500
+ },
+ {
+ "epoch": 0.9882830033897255,
+ "grad_norm": 24863.34375,
+ "learning_rate": 5.8548690974831845e-06,
+ "loss": 0.3991,
+ "step": 191550
+ },
+ {
+ "epoch": 0.9885409733723384,
+ "grad_norm": 23538.44921875,
+ "learning_rate": 5.837727926096109e-06,
+ "loss": 0.3979,
+ "step": 191600
+ },
+ {
+ "epoch": 0.9887989433549512,
+ "grad_norm": 23396.3203125,
+ "learning_rate": 5.820610327674708e-06,
+ "loss": 0.4049,
+ "step": 191650
+ },
+ {
+ "epoch": 0.9890569133375641,
+ "grad_norm": 22553.01171875,
+ "learning_rate": 5.803516311356044e-06,
+ "loss": 0.3983,
+ "step": 191700
+ },
+ {
+ "epoch": 0.9893148833201769,
+ "grad_norm": 25163.04296875,
+ "learning_rate": 5.786445886264541e-06,
+ "loss": 0.3969,
+ "step": 191750
+ },
+ {
+ "epoch": 0.9895728533027897,
+ "grad_norm": 22826.181640625,
+ "learning_rate": 5.769399061512093e-06,
+ "loss": 0.4016,
+ "step": 191800
+ },
+ {
+ "epoch": 0.9898308232854025,
+ "grad_norm": 22302.7265625,
+ "learning_rate": 5.752375846197944e-06,
+ "loss": 0.3988,
+ "step": 191850
+ },
+ {
+ "epoch": 0.9900887932680154,
+ "grad_norm": 20985.990234375,
+ "learning_rate": 5.735376249408753e-06,
+ "loss": 0.3952,
+ "step": 191900
+ },
+ {
+ "epoch": 0.9903467632506282,
+ "grad_norm": 23513.19921875,
+ "learning_rate": 5.718400280218611e-06,
+ "loss": 0.4052,
+ "step": 191950
+ },
+ {
+ "epoch": 0.9906047332332409,
+ "grad_norm": 23184.818359375,
+ "learning_rate": 5.7014479476889145e-06,
+ "loss": 0.399,
+ "step": 192000
+ },
+ {
+ "epoch": 0.9908627032158538,
+ "grad_norm": 23472.9453125,
+ "learning_rate": 5.684519260868521e-06,
+ "loss": 0.3946,
+ "step": 192050
+ },
+ {
+ "epoch": 0.9911206731984666,
+ "grad_norm": 26255.388671875,
+ "learning_rate": 5.667614228793622e-06,
+ "loss": 0.3964,
+ "step": 192100
+ },
+ {
+ "epoch": 0.9913786431810795,
+ "grad_norm": 23894.54296875,
+ "learning_rate": 5.650732860487806e-06,
+ "loss": 0.3928,
+ "step": 192150
+ },
+ {
+ "epoch": 0.9916366131636922,
+ "grad_norm": 24135.478515625,
+ "learning_rate": 5.633875164962016e-06,
+ "loss": 0.4019,
+ "step": 192200
+ },
+ {
+ "epoch": 0.9918945831463051,
+ "grad_norm": 26928.08984375,
+ "learning_rate": 5.617041151214553e-06,
+ "loss": 0.3958,
+ "step": 192250
+ },
+ {
+ "epoch": 0.9921525531289179,
+ "grad_norm": 22469.884765625,
+ "learning_rate": 5.600230828231107e-06,
+ "loss": 0.4031,
+ "step": 192300
+ },
+ {
+ "epoch": 0.9924105231115308,
+ "grad_norm": 23694.59765625,
+ "learning_rate": 5.583444204984695e-06,
+ "loss": 0.3926,
+ "step": 192350
+ },
+ {
+ "epoch": 0.9926684930941435,
+ "grad_norm": 23482.986328125,
+ "learning_rate": 5.566681290435688e-06,
+ "loss": 0.4112,
+ "step": 192400
+ },
+ {
+ "epoch": 0.9929264630767564,
+ "grad_norm": 22524.994140625,
+ "learning_rate": 5.549942093531812e-06,
+ "loss": 0.3981,
+ "step": 192450
+ },
+ {
+ "epoch": 0.9931844330593692,
+ "grad_norm": 27258.35546875,
+ "learning_rate": 5.5332266232081155e-06,
+ "loss": 0.4024,
+ "step": 192500
+ },
+ {
+ "epoch": 0.9934424030419821,
+ "grad_norm": 19928.40625,
+ "learning_rate": 5.516534888386992e-06,
+ "loss": 0.4028,
+ "step": 192550
+ },
+ {
+ "epoch": 0.9937003730245949,
+ "grad_norm": 21809.205078125,
+ "learning_rate": 5.499866897978189e-06,
+ "loss": 0.3996,
+ "step": 192600
+ },
+ {
+ "epoch": 0.9939583430072076,
+ "grad_norm": 22132.6171875,
+ "learning_rate": 5.483222660878729e-06,
+ "loss": 0.4012,
+ "step": 192650
+ },
+ {
+ "epoch": 0.9942163129898205,
+ "grad_norm": 25306.728515625,
+ "learning_rate": 5.466602185973002e-06,
+ "loss": 0.3987,
+ "step": 192700
+ },
+ {
+ "epoch": 0.9944742829724333,
+ "grad_norm": 29266.78515625,
+ "learning_rate": 5.4500054821326865e-06,
+ "loss": 0.4028,
+ "step": 192750
+ },
+ {
+ "epoch": 0.9947322529550462,
+ "grad_norm": 23506.931640625,
+ "learning_rate": 5.433432558216778e-06,
+ "loss": 0.3948,
+ "step": 192800
+ },
+ {
+ "epoch": 0.9949902229376589,
+ "grad_norm": 22564.177734375,
+ "learning_rate": 5.416883423071606e-06,
+ "loss": 0.4015,
+ "step": 192850
+ },
+ {
+ "epoch": 0.9952481929202718,
+ "grad_norm": 24564.380859375,
+ "learning_rate": 5.400358085530738e-06,
+ "loss": 0.4046,
+ "step": 192900
+ },
+ {
+ "epoch": 0.9955061629028846,
+ "grad_norm": 24793.91796875,
+ "learning_rate": 5.383856554415117e-06,
+ "loss": 0.4003,
+ "step": 192950
+ },
+ {
+ "epoch": 0.9957641328854975,
+ "grad_norm": 23798.228515625,
+ "learning_rate": 5.367378838532927e-06,
+ "loss": 0.3982,
+ "step": 193000
+ },
+ {
+ "epoch": 0.9960221028681102,
+ "grad_norm": 23164.642578125,
+ "learning_rate": 5.350924946679653e-06,
+ "loss": 0.3977,
+ "step": 193050
+ },
+ {
+ "epoch": 0.9962800728507231,
+ "grad_norm": 25646.29296875,
+ "learning_rate": 5.334494887638058e-06,
+ "loss": 0.3992,
+ "step": 193100
+ },
+ {
+ "epoch": 0.9965380428333359,
+ "grad_norm": 24146.2421875,
+ "learning_rate": 5.318088670178189e-06,
+ "loss": 0.4037,
+ "step": 193150
+ },
+ {
+ "epoch": 0.9967960128159488,
+ "grad_norm": 22594.72265625,
+ "learning_rate": 5.301706303057386e-06,
+ "loss": 0.4004,
+ "step": 193200
+ },
+ {
+ "epoch": 0.9970539827985616,
+ "grad_norm": 23395.515625,
+ "learning_rate": 5.285347795020224e-06,
+ "loss": 0.3958,
+ "step": 193250
+ },
+ {
+ "epoch": 0.9973119527811743,
+ "grad_norm": 23383.431640625,
+ "learning_rate": 5.269013154798558e-06,
+ "loss": 0.3998,
+ "step": 193300
+ },
+ {
+ "epoch": 0.9975699227637872,
+ "grad_norm": 20586.341796875,
+ "learning_rate": 5.252702391111508e-06,
+ "loss": 0.3979,
+ "step": 193350
+ },
+ {
+ "epoch": 0.9978278927464,
+ "grad_norm": 26526.83203125,
+ "learning_rate": 5.236415512665438e-06,
+ "loss": 0.4036,
+ "step": 193400
+ },
+ {
+ "epoch": 0.9980858627290129,
+ "grad_norm": 25045.224609375,
+ "learning_rate": 5.220152528153965e-06,
+ "loss": 0.4028,
+ "step": 193450
+ },
+ {
+ "epoch": 0.9983438327116256,
+ "grad_norm": 23480.755859375,
+ "learning_rate": 5.20391344625798e-06,
+ "loss": 0.4053,
+ "step": 193500
+ },
+ {
+ "epoch": 0.9986018026942385,
+ "grad_norm": 25235.927734375,
+ "learning_rate": 5.187698275645553e-06,
+ "loss": 0.3964,
+ "step": 193550
+ },
+ {
+ "epoch": 0.9988597726768513,
+ "grad_norm": 24883.29296875,
+ "learning_rate": 5.1715070249720555e-06,
+ "loss": 0.3978,
+ "step": 193600
+ },
+ {
+ "epoch": 0.9991177426594642,
+ "grad_norm": 25161.71484375,
+ "learning_rate": 5.155339702880052e-06,
+ "loss": 0.3998,
+ "step": 193650
+ },
+ {
+ "epoch": 0.9993757126420769,
+ "grad_norm": 21524.724609375,
+ "learning_rate": 5.13919631799934e-06,
+ "loss": 0.3955,
+ "step": 193700
+ },
+ {
+ "epoch": 0.9996336826246898,
+ "grad_norm": 23394.1015625,
+ "learning_rate": 5.123076878946981e-06,
+ "loss": 0.3962,
+ "step": 193750
+ },
+ {
+ "epoch": 0.9998916526073026,
+ "grad_norm": 24562.419921875,
+ "learning_rate": 5.106981394327165e-06,
+ "loss": 0.4,
+ "step": 193800
+ },
+ {
+ "epoch": 1.0001496225899154,
+ "grad_norm": 23818.201171875,
+ "learning_rate": 5.090909872731392e-06,
+ "loss": 0.4065,
+ "step": 193850
+ },
+ {
+ "epoch": 1.0004075925725282,
+ "grad_norm": 25973.83984375,
+ "learning_rate": 5.074862322738316e-06,
+ "loss": 0.4015,
+ "step": 193900
+ },
+ {
+ "epoch": 1.000665562555141,
+ "grad_norm": 26476.041015625,
+ "learning_rate": 5.0588387529138085e-06,
+ "loss": 0.401,
+ "step": 193950
+ },
+ {
+ "epoch": 1.000923532537754,
+ "grad_norm": 22776.267578125,
+ "learning_rate": 5.042839171810937e-06,
+ "loss": 0.4021,
+ "step": 194000
+ },
+ {
+ "epoch": 1.0011815025203668,
+ "grad_norm": 22484.884765625,
+ "learning_rate": 5.026863587969966e-06,
+ "loss": 0.4013,
+ "step": 194050
+ },
+ {
+ "epoch": 1.0014394725029796,
+ "grad_norm": 21445.009765625,
+ "learning_rate": 5.010912009918361e-06,
+ "loss": 0.4001,
+ "step": 194100
+ },
+ {
+ "epoch": 1.0016974424855924,
+ "grad_norm": 23748.365234375,
+ "learning_rate": 4.994984446170764e-06,
+ "loss": 0.3985,
+ "step": 194150
+ },
+ {
+ "epoch": 1.0019554124682053,
+ "grad_norm": 25007.73828125,
+ "learning_rate": 4.9790809052289996e-06,
+ "loss": 0.403,
+ "step": 194200
+ },
+ {
+ "epoch": 1.002213382450818,
+ "grad_norm": 26824.900390625,
+ "learning_rate": 4.963201395582062e-06,
+ "loss": 0.3966,
+ "step": 194250
+ },
+ {
+ "epoch": 1.0024713524334308,
+ "grad_norm": 21838.662109375,
+ "learning_rate": 4.947345925706148e-06,
+ "loss": 0.3955,
+ "step": 194300
+ },
+ {
+ "epoch": 1.0027293224160436,
+ "grad_norm": 20830.59375,
+ "learning_rate": 4.931514504064566e-06,
+ "loss": 0.3976,
+ "step": 194350
+ },
+ {
+ "epoch": 1.0029872923986565,
+ "grad_norm": 24187.484375,
+ "learning_rate": 4.915707139107856e-06,
+ "loss": 0.4009,
+ "step": 194400
+ },
+ {
+ "epoch": 1.0032452623812693,
+ "grad_norm": 23026.99609375,
+ "learning_rate": 4.899923839273662e-06,
+ "loss": 0.4017,
+ "step": 194450
+ },
+ {
+ "epoch": 1.0035032323638822,
+ "grad_norm": 25855.919921875,
+ "learning_rate": 4.884164612986808e-06,
+ "loss": 0.3966,
+ "step": 194500
+ },
+ {
+ "epoch": 1.003761202346495,
+ "grad_norm": 23424.58984375,
+ "learning_rate": 4.86842946865928e-06,
+ "loss": 0.4007,
+ "step": 194550
+ },
+ {
+ "epoch": 1.0040191723291079,
+ "grad_norm": 20644.318359375,
+ "learning_rate": 4.852718414690166e-06,
+ "loss": 0.405,
+ "step": 194600
+ },
+ {
+ "epoch": 1.0042771423117207,
+ "grad_norm": 24923.30078125,
+ "learning_rate": 4.8370314594657405e-06,
+ "loss": 0.3961,
+ "step": 194650
+ },
+ {
+ "epoch": 1.0045351122943333,
+ "grad_norm": 23334.19921875,
+ "learning_rate": 4.821368611359395e-06,
+ "loss": 0.3981,
+ "step": 194700
+ },
+ {
+ "epoch": 1.0047930822769462,
+ "grad_norm": 24258.54296875,
+ "learning_rate": 4.8057298787316516e-06,
+ "loss": 0.3998,
+ "step": 194750
+ },
+ {
+ "epoch": 1.005051052259559,
+ "grad_norm": 23366.234375,
+ "learning_rate": 4.790115269930162e-06,
+ "loss": 0.3998,
+ "step": 194800
+ },
+ {
+ "epoch": 1.005309022242172,
+ "grad_norm": 22389.498046875,
+ "learning_rate": 4.774524793289692e-06,
+ "loss": 0.4025,
+ "step": 194850
+ },
+ {
+ "epoch": 1.0055669922247847,
+ "grad_norm": 25497.361328125,
+ "learning_rate": 4.758958457132157e-06,
+ "loss": 0.3979,
+ "step": 194900
+ },
+ {
+ "epoch": 1.0058249622073976,
+ "grad_norm": 24179.626953125,
+ "learning_rate": 4.7434162697665595e-06,
+ "loss": 0.3984,
+ "step": 194950
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "grad_norm": 24002.955078125,
+ "learning_rate": 4.727898239489015e-06,
+ "loss": 0.398,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0060829321900104,
+ "eval_loss": 0.3868441879749298,
+ "eval_runtime": 3205.6792,
+ "eval_samples_per_second": 967.383,
+ "eval_steps_per_second": 1.889,
+ "step": 195000
+ },
+ {
+ "epoch": 1.0063409021726233,
+ "grad_norm": 26567.27734375,
+ "learning_rate": 4.712404374582741e-06,
+ "loss": 0.399,
+ "step": 195050
+ },
+ {
+ "epoch": 1.006598872155236,
+ "grad_norm": 25244.615234375,
+ "learning_rate": 4.696934683318077e-06,
+ "loss": 0.3998,
+ "step": 195100
+ },
+ {
+ "epoch": 1.0068568421378488,
+ "grad_norm": 23278.265625,
+ "learning_rate": 4.6814891739524195e-06,
+ "loss": 0.4002,
+ "step": 195150
+ },
+ {
+ "epoch": 1.0071148121204616,
+ "grad_norm": 23141.138671875,
+ "learning_rate": 4.666067854730322e-06,
+ "loss": 0.3965,
+ "step": 195200
+ },
+ {
+ "epoch": 1.0073727821030745,
+ "grad_norm": 23506.640625,
+ "learning_rate": 4.650670733883344e-06,
+ "loss": 0.3962,
+ "step": 195250
+ },
+ {
+ "epoch": 1.0076307520856873,
+ "grad_norm": 26591.212890625,
+ "learning_rate": 4.635297819630202e-06,
+ "loss": 0.3992,
+ "step": 195300
+ },
+ {
+ "epoch": 1.0078887220683002,
+ "grad_norm": 22111.640625,
+ "learning_rate": 4.619949120176642e-06,
+ "loss": 0.401,
+ "step": 195350
+ },
+ {
+ "epoch": 1.008146692050913,
+ "grad_norm": 25048.17578125,
+ "learning_rate": 4.604624643715505e-06,
+ "loss": 0.4016,
+ "step": 195400
+ },
+ {
+ "epoch": 1.0084046620335259,
+ "grad_norm": 23263.23828125,
+ "learning_rate": 4.589324398426714e-06,
+ "loss": 0.3942,
+ "step": 195450
+ },
+ {
+ "epoch": 1.0086626320161387,
+ "grad_norm": 23640.9296875,
+ "learning_rate": 4.57404839247722e-06,
+ "loss": 0.4039,
+ "step": 195500
+ },
+ {
+ "epoch": 1.0089206019987513,
+ "grad_norm": 25680.390625,
+ "learning_rate": 4.558796634021079e-06,
+ "loss": 0.3986,
+ "step": 195550
+ },
+ {
+ "epoch": 1.0091785719813642,
+ "grad_norm": 23321.78125,
+ "learning_rate": 4.543569131199382e-06,
+ "loss": 0.4039,
+ "step": 195600
+ },
+ {
+ "epoch": 1.009436541963977,
+ "grad_norm": 24123.205078125,
+ "learning_rate": 4.528365892140263e-06,
+ "loss": 0.397,
+ "step": 195650
+ },
+ {
+ "epoch": 1.0096945119465899,
+ "grad_norm": 23332.673828125,
+ "learning_rate": 4.513186924958928e-06,
+ "loss": 0.3941,
+ "step": 195700
+ },
+ {
+ "epoch": 1.0099524819292027,
+ "grad_norm": 25583.609375,
+ "learning_rate": 4.498032237757605e-06,
+ "loss": 0.4046,
+ "step": 195750
+ },
+ {
+ "epoch": 1.0102104519118156,
+ "grad_norm": 25230.3515625,
+ "learning_rate": 4.482901838625586e-06,
+ "loss": 0.4012,
+ "step": 195800
+ },
+ {
+ "epoch": 1.0104684218944284,
+ "grad_norm": 24376.5859375,
+ "learning_rate": 4.46779573563918e-06,
+ "loss": 0.3911,
+ "step": 195850
+ },
+ {
+ "epoch": 1.0107263918770413,
+ "grad_norm": 23978.17578125,
+ "learning_rate": 4.452713936861724e-06,
+ "loss": 0.4031,
+ "step": 195900
+ },
+ {
+ "epoch": 1.010984361859654,
+ "grad_norm": 23535.03515625,
+ "learning_rate": 4.437656450343602e-06,
+ "loss": 0.3933,
+ "step": 195950
+ },
+ {
+ "epoch": 1.0112423318422668,
+ "grad_norm": 24465.794921875,
+ "learning_rate": 4.422623284122207e-06,
+ "loss": 0.4027,
+ "step": 196000
+ },
+ {
+ "epoch": 1.0115003018248796,
+ "grad_norm": 23942.03125,
+ "learning_rate": 4.407614446221936e-06,
+ "loss": 0.4024,
+ "step": 196050
+ },
+ {
+ "epoch": 1.0117582718074924,
+ "grad_norm": 23610.720703125,
+ "learning_rate": 4.392629944654248e-06,
+ "loss": 0.3982,
+ "step": 196100
+ },
+ {
+ "epoch": 1.0120162417901053,
+ "grad_norm": 25937.53125,
+ "learning_rate": 4.3776697874175375e-06,
+ "loss": 0.3991,
+ "step": 196150
+ },
+ {
+ "epoch": 1.0122742117727181,
+ "grad_norm": 24008.5234375,
+ "learning_rate": 4.362733982497286e-06,
+ "loss": 0.3968,
+ "step": 196200
+ },
+ {
+ "epoch": 1.012532181755331,
+ "grad_norm": 23377.744140625,
+ "learning_rate": 4.347822537865914e-06,
+ "loss": 0.3958,
+ "step": 196250
+ },
+ {
+ "epoch": 1.0127901517379438,
+ "grad_norm": 23768.7421875,
+ "learning_rate": 4.332935461482862e-06,
+ "loss": 0.4004,
+ "step": 196300
+ },
+ {
+ "epoch": 1.0130481217205567,
+ "grad_norm": 25974.603515625,
+ "learning_rate": 4.3180727612945896e-06,
+ "loss": 0.4038,
+ "step": 196350
+ },
+ {
+ "epoch": 1.0133060917031693,
+ "grad_norm": 22376.34765625,
+ "learning_rate": 4.303234445234477e-06,
+ "loss": 0.3991,
+ "step": 196400
+ },
+ {
+ "epoch": 1.0135640616857822,
+ "grad_norm": 22145.03515625,
+ "learning_rate": 4.288420521222963e-06,
+ "loss": 0.3971,
+ "step": 196450
+ },
+ {
+ "epoch": 1.013822031668395,
+ "grad_norm": 21512.77734375,
+ "learning_rate": 4.273630997167422e-06,
+ "loss": 0.399,
+ "step": 196500
+ },
+ {
+ "epoch": 1.0140800016510079,
+ "grad_norm": 22957.626953125,
+ "learning_rate": 4.258865880962215e-06,
+ "loss": 0.3995,
+ "step": 196550
+ },
+ {
+ "epoch": 1.0143379716336207,
+ "grad_norm": 21951.89453125,
+ "learning_rate": 4.244125180488673e-06,
+ "loss": 0.3961,
+ "step": 196600
+ },
+ {
+ "epoch": 1.0145959416162336,
+ "grad_norm": 23440.005859375,
+ "learning_rate": 4.229408903615095e-06,
+ "loss": 0.4057,
+ "step": 196650
+ },
+ {
+ "epoch": 1.0148539115988464,
+ "grad_norm": 23987.21484375,
+ "learning_rate": 4.214717058196754e-06,
+ "loss": 0.3999,
+ "step": 196700
+ },
+ {
+ "epoch": 1.0151118815814593,
+ "grad_norm": 24526.482421875,
+ "learning_rate": 4.200049652075866e-06,
+ "loss": 0.3964,
+ "step": 196750
+ },
+ {
+ "epoch": 1.0153698515640721,
+ "grad_norm": 23351.193359375,
+ "learning_rate": 4.185406693081612e-06,
+ "loss": 0.3978,
+ "step": 196800
+ },
+ {
+ "epoch": 1.0156278215466847,
+ "grad_norm": 25014.873046875,
+ "learning_rate": 4.170788189030106e-06,
+ "loss": 0.3963,
+ "step": 196850
+ },
+ {
+ "epoch": 1.0158857915292976,
+ "grad_norm": 21085.181640625,
+ "learning_rate": 4.156194147724451e-06,
+ "loss": 0.4015,
+ "step": 196900
+ },
+ {
+ "epoch": 1.0161437615119104,
+ "grad_norm": 20203.427734375,
+ "learning_rate": 4.141624576954634e-06,
+ "loss": 0.4037,
+ "step": 196950
+ },
+ {
+ "epoch": 1.0164017314945233,
+ "grad_norm": 23869.416015625,
+ "learning_rate": 4.1270794844976255e-06,
+ "loss": 0.4038,
+ "step": 197000
+ },
+ {
+ "epoch": 1.0166597014771361,
+ "grad_norm": 24936.158203125,
+ "learning_rate": 4.112558878117318e-06,
+ "loss": 0.4073,
+ "step": 197050
+ },
+ {
+ "epoch": 1.016917671459749,
+ "grad_norm": 23021.921875,
+ "learning_rate": 4.098062765564509e-06,
+ "loss": 0.4056,
+ "step": 197100
+ },
+ {
+ "epoch": 1.0171756414423618,
+ "grad_norm": 21626.19921875,
+ "learning_rate": 4.083591154576971e-06,
+ "loss": 0.3989,
+ "step": 197150
+ },
+ {
+ "epoch": 1.0174336114249747,
+ "grad_norm": 25556.169921875,
+ "learning_rate": 4.069144052879342e-06,
+ "loss": 0.3975,
+ "step": 197200
+ },
+ {
+ "epoch": 1.0176915814075873,
+ "grad_norm": 23286.365234375,
+ "learning_rate": 4.054721468183226e-06,
+ "loss": 0.3974,
+ "step": 197250
+ },
+ {
+ "epoch": 1.0179495513902002,
+ "grad_norm": 24497.57421875,
+ "learning_rate": 4.040323408187113e-06,
+ "loss": 0.4028,
+ "step": 197300
+ },
+ {
+ "epoch": 1.018207521372813,
+ "grad_norm": 26279.40625,
+ "learning_rate": 4.025949880576407e-06,
+ "loss": 0.4034,
+ "step": 197350
+ },
+ {
+ "epoch": 1.0184654913554259,
+ "grad_norm": 22679.267578125,
+ "learning_rate": 4.011600893023421e-06,
+ "loss": 0.3991,
+ "step": 197400
+ },
+ {
+ "epoch": 1.0187234613380387,
+ "grad_norm": 25421.83984375,
+ "learning_rate": 3.997276453187365e-06,
+ "loss": 0.4023,
+ "step": 197450
+ },
+ {
+ "epoch": 1.0189814313206516,
+ "grad_norm": 25313.75,
+ "learning_rate": 3.982976568714336e-06,
+ "loss": 0.4018,
+ "step": 197500
+ },
+ {
+ "epoch": 1.0192394013032644,
+ "grad_norm": 24318.505859375,
+ "learning_rate": 3.96870124723736e-06,
+ "loss": 0.4027,
+ "step": 197550
+ },
+ {
+ "epoch": 1.0194973712858773,
+ "grad_norm": 22409.70703125,
+ "learning_rate": 3.9544504963763105e-06,
+ "loss": 0.3982,
+ "step": 197600
+ },
+ {
+ "epoch": 1.01975534126849,
+ "grad_norm": 25028.7265625,
+ "learning_rate": 3.9402243237379675e-06,
+ "loss": 0.4037,
+ "step": 197650
+ },
+ {
+ "epoch": 1.0200133112511027,
+ "grad_norm": 21235.19140625,
+ "learning_rate": 3.926022736915985e-06,
+ "loss": 0.3972,
+ "step": 197700
+ },
+ {
+ "epoch": 1.0202712812337156,
+ "grad_norm": 24214.41015625,
+ "learning_rate": 3.911845743490889e-06,
+ "loss": 0.3984,
+ "step": 197750
+ },
+ {
+ "epoch": 1.0205292512163284,
+ "grad_norm": 24445.375,
+ "learning_rate": 3.897693351030102e-06,
+ "loss": 0.4025,
+ "step": 197800
+ },
+ {
+ "epoch": 1.0207872211989413,
+ "grad_norm": 25233.3515625,
+ "learning_rate": 3.883565567087871e-06,
+ "loss": 0.3993,
+ "step": 197850
+ },
+ {
+ "epoch": 1.0210451911815541,
+ "grad_norm": 23982.43359375,
+ "learning_rate": 3.8694623992053534e-06,
+ "loss": 0.4023,
+ "step": 197900
+ },
+ {
+ "epoch": 1.021303161164167,
+ "grad_norm": 28533.689453125,
+ "learning_rate": 3.855383854910549e-06,
+ "loss": 0.3917,
+ "step": 197950
+ },
+ {
+ "epoch": 1.0215611311467798,
+ "grad_norm": 26334.77734375,
+ "learning_rate": 3.841329941718286e-06,
+ "loss": 0.3989,
+ "step": 198000
+ },
+ {
+ "epoch": 1.0218191011293927,
+ "grad_norm": 24765.802734375,
+ "learning_rate": 3.827300667130312e-06,
+ "loss": 0.398,
+ "step": 198050
+ },
+ {
+ "epoch": 1.0220770711120055,
+ "grad_norm": 25089.34765625,
+ "learning_rate": 3.8132960386351445e-06,
+ "loss": 0.4049,
+ "step": 198100
+ },
+ {
+ "epoch": 1.0223350410946181,
+ "grad_norm": 23840.72265625,
+ "learning_rate": 3.7993160637082027e-06,
+ "loss": 0.3998,
+ "step": 198150
+ },
+ {
+ "epoch": 1.022593011077231,
+ "grad_norm": 21590.1328125,
+ "learning_rate": 3.7853607498117282e-06,
+ "loss": 0.404,
+ "step": 198200
+ },
+ {
+ "epoch": 1.0228509810598438,
+ "grad_norm": 24620.478515625,
+ "learning_rate": 3.7714301043947855e-06,
+ "loss": 0.3958,
+ "step": 198250
+ },
+ {
+ "epoch": 1.0231089510424567,
+ "grad_norm": 22476.82421875,
+ "learning_rate": 3.757524134893292e-06,
+ "loss": 0.3993,
+ "step": 198300
+ },
+ {
+ "epoch": 1.0233669210250695,
+ "grad_norm": 22550.45703125,
+ "learning_rate": 3.7436428487299836e-06,
+ "loss": 0.3983,
+ "step": 198350
+ },
+ {
+ "epoch": 1.0236248910076824,
+ "grad_norm": 23764.958984375,
+ "learning_rate": 3.7297862533144045e-06,
+ "loss": 0.4005,
+ "step": 198400
+ },
+ {
+ "epoch": 1.0238828609902952,
+ "grad_norm": 23600.103515625,
+ "learning_rate": 3.7159543560429667e-06,
+ "loss": 0.3976,
+ "step": 198450
+ },
+ {
+ "epoch": 1.024140830972908,
+ "grad_norm": 24258.537109375,
+ "learning_rate": 3.7021471642988583e-06,
+ "loss": 0.4015,
+ "step": 198500
+ },
+ {
+ "epoch": 1.0243988009555207,
+ "grad_norm": 22559.609375,
+ "learning_rate": 3.6883646854520837e-06,
+ "loss": 0.4028,
+ "step": 198550
+ },
+ {
+ "epoch": 1.0246567709381336,
+ "grad_norm": 20827.234375,
+ "learning_rate": 3.67460692685947e-06,
+ "loss": 0.3954,
+ "step": 198600
+ },
+ {
+ "epoch": 1.0249147409207464,
+ "grad_norm": 24864.171875,
+ "learning_rate": 3.6608738958646303e-06,
+ "loss": 0.3919,
+ "step": 198650
+ },
+ {
+ "epoch": 1.0251727109033593,
+ "grad_norm": 25603.6796875,
+ "learning_rate": 3.647165599798019e-06,
+ "loss": 0.3984,
+ "step": 198700
+ },
+ {
+ "epoch": 1.0254306808859721,
+ "grad_norm": 21448.0234375,
+ "learning_rate": 3.6334820459768217e-06,
+ "loss": 0.4031,
+ "step": 198750
+ },
+ {
+ "epoch": 1.025688650868585,
+ "grad_norm": 24923.51953125,
+ "learning_rate": 3.6198232417050782e-06,
+ "loss": 0.4023,
+ "step": 198800
+ },
+ {
+ "epoch": 1.0259466208511978,
+ "grad_norm": 21672.09765625,
+ "learning_rate": 3.6061891942735957e-06,
+ "loss": 0.4027,
+ "step": 198850
+ },
+ {
+ "epoch": 1.0262045908338107,
+ "grad_norm": 24733.31640625,
+ "learning_rate": 3.5925799109599423e-06,
+ "loss": 0.401,
+ "step": 198900
+ },
+ {
+ "epoch": 1.0264625608164235,
+ "grad_norm": 25941.05859375,
+ "learning_rate": 3.5789953990285284e-06,
+ "loss": 0.3944,
+ "step": 198950
+ },
+ {
+ "epoch": 1.0267205307990361,
+ "grad_norm": 25462.96875,
+ "learning_rate": 3.56543566573046e-06,
+ "loss": 0.4021,
+ "step": 199000
+ },
+ {
+ "epoch": 1.026978500781649,
+ "grad_norm": 24243.462890625,
+ "learning_rate": 3.5519007183036856e-06,
+ "loss": 0.4009,
+ "step": 199050
+ },
+ {
+ "epoch": 1.0272364707642618,
+ "grad_norm": 22507.208984375,
+ "learning_rate": 3.5383905639728987e-06,
+ "loss": 0.3968,
+ "step": 199100
+ },
+ {
+ "epoch": 1.0274944407468747,
+ "grad_norm": 22496.060546875,
+ "learning_rate": 3.524905209949553e-06,
+ "loss": 0.3988,
+ "step": 199150
+ },
+ {
+ "epoch": 1.0277524107294875,
+ "grad_norm": 22755.974609375,
+ "learning_rate": 3.511444663431862e-06,
+ "loss": 0.3944,
+ "step": 199200
+ },
+ {
+ "epoch": 1.0280103807121004,
+ "grad_norm": 24945.93359375,
+ "learning_rate": 3.498008931604818e-06,
+ "loss": 0.4015,
+ "step": 199250
+ },
+ {
+ "epoch": 1.0282683506947132,
+ "grad_norm": 23216.15625,
+ "learning_rate": 3.484598021640134e-06,
+ "loss": 0.3982,
+ "step": 199300
+ },
+ {
+ "epoch": 1.028526320677326,
+ "grad_norm": 24690.8203125,
+ "learning_rate": 3.4712119406963174e-06,
+ "loss": 0.4,
+ "step": 199350
+ },
+ {
+ "epoch": 1.0287842906599387,
+ "grad_norm": 23324.27734375,
+ "learning_rate": 3.4578506959185907e-06,
+ "loss": 0.4005,
+ "step": 199400
+ },
+ {
+ "epoch": 1.0290422606425516,
+ "grad_norm": 22831.544921875,
+ "learning_rate": 3.444514294438922e-06,
+ "loss": 0.3987,
+ "step": 199450
+ },
+ {
+ "epoch": 1.0293002306251644,
+ "grad_norm": 22126.681640625,
+ "learning_rate": 3.4312027433760383e-06,
+ "loss": 0.4044,
+ "step": 199500
+ },
+ {
+ "epoch": 1.0295582006077773,
+ "grad_norm": 22105.94140625,
+ "learning_rate": 3.417916049835368e-06,
+ "loss": 0.4023,
+ "step": 199550
+ },
+ {
+ "epoch": 1.02981617059039,
+ "grad_norm": 24164.646484375,
+ "learning_rate": 3.4046542209091037e-06,
+ "loss": 0.3968,
+ "step": 199600
+ },
+ {
+ "epoch": 1.030074140573003,
+ "grad_norm": 23752.33203125,
+ "learning_rate": 3.3914172636761554e-06,
+ "loss": 0.3974,
+ "step": 199650
+ },
+ {
+ "epoch": 1.0303321105556158,
+ "grad_norm": 21793.787109375,
+ "learning_rate": 3.3782051852021433e-06,
+ "loss": 0.3981,
+ "step": 199700
+ },
+ {
+ "epoch": 1.0305900805382286,
+ "grad_norm": 26727.91796875,
+ "learning_rate": 3.365017992539432e-06,
+ "loss": 0.4025,
+ "step": 199750
+ },
+ {
+ "epoch": 1.0308480505208415,
+ "grad_norm": 21089.958984375,
+ "learning_rate": 3.3518556927270683e-06,
+ "loss": 0.4001,
+ "step": 199800
+ },
+ {
+ "epoch": 1.0311060205034541,
+ "grad_norm": 23690.0390625,
+ "learning_rate": 3.33871829279086e-06,
+ "loss": 0.3956,
+ "step": 199850
+ },
+ {
+ "epoch": 1.031363990486067,
+ "grad_norm": 24266.84375,
+ "learning_rate": 3.325605799743281e-06,
+ "loss": 0.3966,
+ "step": 199900
+ },
+ {
+ "epoch": 1.0316219604686798,
+ "grad_norm": 22199.455078125,
+ "learning_rate": 3.312518220583527e-06,
+ "loss": 0.4058,
+ "step": 199950
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "grad_norm": 21272.033203125,
+ "learning_rate": 3.299455562297504e-06,
+ "loss": 0.3969,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0318799304512927,
+ "eval_loss": 0.38684460520744324,
+ "eval_runtime": 3230.0057,
+ "eval_samples_per_second": 960.097,
+ "eval_steps_per_second": 1.875,
+ "step": 200000
+ },
+ {
+ "epoch": 1.0321379004339055,
+ "grad_norm": 23089.7578125,
+ "learning_rate": 3.286417831857791e-06,
+ "loss": 0.4011,
+ "step": 200050
+ },
+ {
+ "epoch": 1.0323958704165184,
+ "grad_norm": 27875.5859375,
+ "learning_rate": 3.2734050362236814e-06,
+ "loss": 0.4014,
+ "step": 200100
+ },
+ {
+ "epoch": 1.0326538403991312,
+ "grad_norm": 22023.40234375,
+ "learning_rate": 3.260417182341169e-06,
+ "loss": 0.398,
+ "step": 200150
+ },
+ {
+ "epoch": 1.032911810381744,
+ "grad_norm": 23899.208984375,
+ "learning_rate": 3.247454277142892e-06,
+ "loss": 0.3976,
+ "step": 200200
+ },
+ {
+ "epoch": 1.0331697803643567,
+ "grad_norm": 22874.44921875,
+ "learning_rate": 3.2345163275482147e-06,
+ "loss": 0.4014,
+ "step": 200250
+ },
+ {
+ "epoch": 1.0334277503469695,
+ "grad_norm": 21650.296875,
+ "learning_rate": 3.221603340463164e-06,
+ "loss": 0.4012,
+ "step": 200300
+ },
+ {
+ "epoch": 1.0336857203295824,
+ "grad_norm": 24189.89453125,
+ "learning_rate": 3.2087153227804314e-06,
+ "loss": 0.401,
+ "step": 200350
+ },
+ {
+ "epoch": 1.0339436903121952,
+ "grad_norm": 21525.12109375,
+ "learning_rate": 3.1958522813794134e-06,
+ "loss": 0.4016,
+ "step": 200400
+ },
+ {
+ "epoch": 1.034201660294808,
+ "grad_norm": 23732.640625,
+ "learning_rate": 3.1830142231261294e-06,
+ "loss": 0.4021,
+ "step": 200450
+ },
+ {
+ "epoch": 1.034459630277421,
+ "grad_norm": 24911.607421875,
+ "learning_rate": 3.170201154873298e-06,
+ "loss": 0.3943,
+ "step": 200500
+ },
+ {
+ "epoch": 1.0347176002600338,
+ "grad_norm": 25295.861328125,
+ "learning_rate": 3.1574130834602813e-06,
+ "loss": 0.401,
+ "step": 200550
+ },
+ {
+ "epoch": 1.0349755702426466,
+ "grad_norm": 23536.498046875,
+ "learning_rate": 3.1446500157131075e-06,
+ "loss": 0.3964,
+ "step": 200600
+ },
+ {
+ "epoch": 1.0352335402252595,
+ "grad_norm": 26484.287109375,
+ "learning_rate": 3.131911958444461e-06,
+ "loss": 0.4068,
+ "step": 200650
+ },
+ {
+ "epoch": 1.0354915102078721,
+ "grad_norm": 24330.001953125,
+ "learning_rate": 3.1191989184536474e-06,
+ "loss": 0.3911,
+ "step": 200700
+ },
+ {
+ "epoch": 1.035749480190485,
+ "grad_norm": 21095.994140625,
+ "learning_rate": 3.1065109025266713e-06,
+ "loss": 0.4,
+ "step": 200750
+ },
+ {
+ "epoch": 1.0360074501730978,
+ "grad_norm": 21829.64453125,
+ "learning_rate": 3.093847917436132e-06,
+ "loss": 0.4016,
+ "step": 200800
+ },
+ {
+ "epoch": 1.0362654201557107,
+ "grad_norm": 25772.79296875,
+ "learning_rate": 3.0812099699412953e-06,
+ "loss": 0.4032,
+ "step": 200850
+ },
+ {
+ "epoch": 1.0365233901383235,
+ "grad_norm": 25614.240234375,
+ "learning_rate": 3.0685970667880425e-06,
+ "loss": 0.3976,
+ "step": 200900
+ },
+ {
+ "epoch": 1.0367813601209364,
+ "grad_norm": 26170.455078125,
+ "learning_rate": 3.056009214708905e-06,
+ "loss": 0.4001,
+ "step": 200950
+ },
+ {
+ "epoch": 1.0370393301035492,
+ "grad_norm": 24801.76171875,
+ "learning_rate": 3.0434464204230186e-06,
+ "loss": 0.3924,
+ "step": 201000
+ },
+ {
+ "epoch": 1.037297300086162,
+ "grad_norm": 28940.640625,
+ "learning_rate": 3.0309086906361917e-06,
+ "loss": 0.3998,
+ "step": 201050
+ },
+ {
+ "epoch": 1.037555270068775,
+ "grad_norm": 23856.90625,
+ "learning_rate": 3.018396032040788e-06,
+ "loss": 0.397,
+ "step": 201100
+ },
+ {
+ "epoch": 1.0378132400513875,
+ "grad_norm": 23309.861328125,
+ "learning_rate": 3.005908451315842e-06,
+ "loss": 0.4026,
+ "step": 201150
+ },
+ {
+ "epoch": 1.0380712100340004,
+ "grad_norm": 23592.7265625,
+ "learning_rate": 2.993445955126978e-06,
+ "loss": 0.3971,
+ "step": 201200
+ },
+ {
+ "epoch": 1.0383291800166132,
+ "grad_norm": 23301.861328125,
+ "learning_rate": 2.9810085501264296e-06,
+ "loss": 0.403,
+ "step": 201250
+ },
+ {
+ "epoch": 1.038587149999226,
+ "grad_norm": 23200.0859375,
+ "learning_rate": 2.968596242953059e-06,
+ "loss": 0.4001,
+ "step": 201300
+ },
+ {
+ "epoch": 1.038845119981839,
+ "grad_norm": 26894.70703125,
+ "learning_rate": 2.956209040232294e-06,
+ "loss": 0.3988,
+ "step": 201350
+ },
+ {
+ "epoch": 1.0391030899644518,
+ "grad_norm": 22423.931640625,
+ "learning_rate": 2.9438469485761956e-06,
+ "loss": 0.3981,
+ "step": 201400
+ },
+ {
+ "epoch": 1.0393610599470646,
+ "grad_norm": 24167.068359375,
+ "learning_rate": 2.9315099745834073e-06,
+ "loss": 0.4024,
+ "step": 201450
+ },
+ {
+ "epoch": 1.0396190299296775,
+ "grad_norm": 25832.712890625,
+ "learning_rate": 2.9191981248391677e-06,
+ "loss": 0.3937,
+ "step": 201500
+ },
+ {
+ "epoch": 1.03987699991229,
+ "grad_norm": 26923.005859375,
+ "learning_rate": 2.9069114059153024e-06,
+ "loss": 0.3922,
+ "step": 201550
+ },
+ {
+ "epoch": 1.040134969894903,
+ "grad_norm": 23295.380859375,
+ "learning_rate": 2.8946498243702158e-06,
+ "loss": 0.4011,
+ "step": 201600
+ },
+ {
+ "epoch": 1.0403929398775158,
+ "grad_norm": 23378.5234375,
+ "learning_rate": 2.882413386748922e-06,
+ "loss": 0.4033,
+ "step": 201650
+ },
+ {
+ "epoch": 1.0406509098601286,
+ "grad_norm": 24349.9140625,
+ "learning_rate": 2.8702020995829803e-06,
+ "loss": 0.3964,
+ "step": 201700
+ },
+ {
+ "epoch": 1.0409088798427415,
+ "grad_norm": 24178.61328125,
+ "learning_rate": 2.8580159693905485e-06,
+ "loss": 0.3978,
+ "step": 201750
+ },
+ {
+ "epoch": 1.0411668498253543,
+ "grad_norm": 24998.189453125,
+ "learning_rate": 2.8458550026763344e-06,
+ "loss": 0.3943,
+ "step": 201800
+ },
+ {
+ "epoch": 1.0414248198079672,
+ "grad_norm": 28928.828125,
+ "learning_rate": 2.8337192059316344e-06,
+ "loss": 0.3998,
+ "step": 201850
+ },
+ {
+ "epoch": 1.04168278979058,
+ "grad_norm": 24329.37890625,
+ "learning_rate": 2.8216085856342946e-06,
+ "loss": 0.3976,
+ "step": 201900
+ },
+ {
+ "epoch": 1.041940759773193,
+ "grad_norm": 24121.482421875,
+ "learning_rate": 2.809523148248744e-06,
+ "loss": 0.3952,
+ "step": 201950
+ },
+ {
+ "epoch": 1.0421987297558055,
+ "grad_norm": 23812.671875,
+ "learning_rate": 2.7974629002259443e-06,
+ "loss": 0.4052,
+ "step": 202000
+ },
+ {
+ "epoch": 1.0424566997384184,
+ "grad_norm": 25162.40234375,
+ "learning_rate": 2.785427848003419e-06,
+ "loss": 0.3948,
+ "step": 202050
+ },
+ {
+ "epoch": 1.0427146697210312,
+ "grad_norm": 23631.462890625,
+ "learning_rate": 2.773417998005262e-06,
+ "loss": 0.3982,
+ "step": 202100
+ },
+ {
+ "epoch": 1.042972639703644,
+ "grad_norm": 24178.177734375,
+ "learning_rate": 2.761433356642079e-06,
+ "loss": 0.4012,
+ "step": 202150
+ },
+ {
+ "epoch": 1.043230609686257,
+ "grad_norm": 24726.37890625,
+ "learning_rate": 2.7494739303110527e-06,
+ "loss": 0.3926,
+ "step": 202200
+ },
+ {
+ "epoch": 1.0434885796688698,
+ "grad_norm": 23798.73828125,
+ "learning_rate": 2.7375397253958935e-06,
+ "loss": 0.3998,
+ "step": 202250
+ },
+ {
+ "epoch": 1.0437465496514826,
+ "grad_norm": 25162.677734375,
+ "learning_rate": 2.725630748266844e-06,
+ "loss": 0.4038,
+ "step": 202300
+ },
+ {
+ "epoch": 1.0440045196340955,
+ "grad_norm": 28668.78515625,
+ "learning_rate": 2.7137470052806814e-06,
+ "loss": 0.3989,
+ "step": 202350
+ },
+ {
+ "epoch": 1.0442624896167083,
+ "grad_norm": 22550.810546875,
+ "learning_rate": 2.7018885027807195e-06,
+ "loss": 0.3994,
+ "step": 202400
+ },
+ {
+ "epoch": 1.044520459599321,
+ "grad_norm": 26758.71484375,
+ "learning_rate": 2.6900552470968064e-06,
+ "loss": 0.4063,
+ "step": 202450
+ },
+ {
+ "epoch": 1.0447784295819338,
+ "grad_norm": 24895.77734375,
+ "learning_rate": 2.678247244545301e-06,
+ "loss": 0.3968,
+ "step": 202500
+ },
+ {
+ "epoch": 1.0450363995645466,
+ "grad_norm": 22442.416015625,
+ "learning_rate": 2.6664645014290833e-06,
+ "loss": 0.4009,
+ "step": 202550
+ },
+ {
+ "epoch": 1.0452943695471595,
+ "grad_norm": 24647.232421875,
+ "learning_rate": 2.654707024037556e-06,
+ "loss": 0.3984,
+ "step": 202600
+ },
+ {
+ "epoch": 1.0455523395297723,
+ "grad_norm": 24156.189453125,
+ "learning_rate": 2.6429748186466265e-06,
+ "loss": 0.3983,
+ "step": 202650
+ },
+ {
+ "epoch": 1.0458103095123852,
+ "grad_norm": 24131.658203125,
+ "learning_rate": 2.6312678915187185e-06,
+ "loss": 0.3941,
+ "step": 202700
+ },
+ {
+ "epoch": 1.046068279494998,
+ "grad_norm": 24890.5625,
+ "learning_rate": 2.6195862489027833e-06,
+ "loss": 0.3936,
+ "step": 202750
+ },
+ {
+ "epoch": 1.0463262494776109,
+ "grad_norm": 26486.58203125,
+ "learning_rate": 2.607929897034228e-06,
+ "loss": 0.4073,
+ "step": 202800
+ },
+ {
+ "epoch": 1.0465842194602235,
+ "grad_norm": 24554.09375,
+ "learning_rate": 2.5962988421350033e-06,
+ "loss": 0.3985,
+ "step": 202850
+ },
+ {
+ "epoch": 1.0468421894428364,
+ "grad_norm": 24964.349609375,
+ "learning_rate": 2.584693090413537e-06,
+ "loss": 0.3974,
+ "step": 202900
+ },
+ {
+ "epoch": 1.0471001594254492,
+ "grad_norm": 21256.87890625,
+ "learning_rate": 2.5731126480647516e-06,
+ "loss": 0.3969,
+ "step": 202950
+ },
+ {
+ "epoch": 1.047358129408062,
+ "grad_norm": 23721.197265625,
+ "learning_rate": 2.5615575212700804e-06,
+ "loss": 0.4039,
+ "step": 203000
+ },
+ {
+ "epoch": 1.047616099390675,
+ "grad_norm": 25096.4609375,
+ "learning_rate": 2.550027716197395e-06,
+ "loss": 0.3953,
+ "step": 203050
+ },
+ {
+ "epoch": 1.0478740693732878,
+ "grad_norm": 22199.11328125,
+ "learning_rate": 2.5385232390011114e-06,
+ "loss": 0.3979,
+ "step": 203100
+ },
+ {
+ "epoch": 1.0481320393559006,
+ "grad_norm": 24967.4609375,
+ "learning_rate": 2.527044095822084e-06,
+ "loss": 0.4023,
+ "step": 203150
+ },
+ {
+ "epoch": 1.0483900093385135,
+ "grad_norm": 28301.302734375,
+ "learning_rate": 2.5155902927876564e-06,
+ "loss": 0.4047,
+ "step": 203200
+ },
+ {
+ "epoch": 1.0486479793211263,
+ "grad_norm": 22268.037109375,
+ "learning_rate": 2.504161836011648e-06,
+ "loss": 0.4032,
+ "step": 203250
+ },
+ {
+ "epoch": 1.048905949303739,
+ "grad_norm": 28254.658203125,
+ "learning_rate": 2.4927587315943414e-06,
+ "loss": 0.3915,
+ "step": 203300
+ },
+ {
+ "epoch": 1.0491639192863518,
+ "grad_norm": 24471.462890625,
+ "learning_rate": 2.4813809856225112e-06,
+ "loss": 0.3986,
+ "step": 203350
+ },
+ {
+ "epoch": 1.0494218892689646,
+ "grad_norm": 24208.7578125,
+ "learning_rate": 2.470028604169361e-06,
+ "loss": 0.3969,
+ "step": 203400
+ },
+ {
+ "epoch": 1.0496798592515775,
+ "grad_norm": 23962.025390625,
+ "learning_rate": 2.4587015932945824e-06,
+ "loss": 0.3992,
+ "step": 203450
+ },
+ {
+ "epoch": 1.0499378292341903,
+ "grad_norm": 24777.421875,
+ "learning_rate": 2.4473999590443054e-06,
+ "loss": 0.4042,
+ "step": 203500
+ },
+ {
+ "epoch": 1.0501957992168032,
+ "grad_norm": 26705.40234375,
+ "learning_rate": 2.4361237074511323e-06,
+ "loss": 0.3985,
+ "step": 203550
+ },
+ {
+ "epoch": 1.050453769199416,
+ "grad_norm": 22508.51171875,
+ "learning_rate": 2.424872844534093e-06,
+ "loss": 0.3967,
+ "step": 203600
+ },
+ {
+ "epoch": 1.0507117391820289,
+ "grad_norm": 24678.62109375,
+ "learning_rate": 2.4136473762987057e-06,
+ "loss": 0.4002,
+ "step": 203650
+ },
+ {
+ "epoch": 1.0509697091646415,
+ "grad_norm": 24190.259765625,
+ "learning_rate": 2.402447308736883e-06,
+ "loss": 0.4002,
+ "step": 203700
+ },
+ {
+ "epoch": 1.0512276791472543,
+ "grad_norm": 27986.912109375,
+ "learning_rate": 2.391272647827014e-06,
+ "loss": 0.406,
+ "step": 203750
+ },
+ {
+ "epoch": 1.0514856491298672,
+ "grad_norm": 23664.740234375,
+ "learning_rate": 2.3801233995339236e-06,
+ "loss": 0.3988,
+ "step": 203800
+ },
+ {
+ "epoch": 1.05174361911248,
+ "grad_norm": 32503.17578125,
+ "learning_rate": 2.368999569808844e-06,
+ "loss": 0.3996,
+ "step": 203850
+ },
+ {
+ "epoch": 1.052001589095093,
+ "grad_norm": 24140.591796875,
+ "learning_rate": 2.3579011645894933e-06,
+ "loss": 0.4021,
+ "step": 203900
+ },
+ {
+ "epoch": 1.0522595590777057,
+ "grad_norm": 24920.033203125,
+ "learning_rate": 2.3468281897999487e-06,
+ "loss": 0.4038,
+ "step": 203950
+ },
+ {
+ "epoch": 1.0525175290603186,
+ "grad_norm": 20836.1796875,
+ "learning_rate": 2.335780651350772e-06,
+ "loss": 0.3929,
+ "step": 204000
+ },
+ {
+ "epoch": 1.0527754990429314,
+ "grad_norm": 22305.021484375,
+ "learning_rate": 2.324758555138923e-06,
+ "loss": 0.3963,
+ "step": 204050
+ },
+ {
+ "epoch": 1.0530334690255443,
+ "grad_norm": 22536.13671875,
+ "learning_rate": 2.3137619070477788e-06,
+ "loss": 0.3923,
+ "step": 204100
+ },
+ {
+ "epoch": 1.053291439008157,
+ "grad_norm": 23319.326171875,
+ "learning_rate": 2.3027907129471395e-06,
+ "loss": 0.4034,
+ "step": 204150
+ },
+ {
+ "epoch": 1.0535494089907698,
+ "grad_norm": 25774.677734375,
+ "learning_rate": 2.2918449786932085e-06,
+ "loss": 0.4015,
+ "step": 204200
+ },
+ {
+ "epoch": 1.0538073789733826,
+ "grad_norm": 23130.119140625,
+ "learning_rate": 2.280924710128618e-06,
+ "loss": 0.3971,
+ "step": 204250
+ },
+ {
+ "epoch": 1.0540653489559955,
+ "grad_norm": 23122.1875,
+ "learning_rate": 2.270029913082394e-06,
+ "loss": 0.3969,
+ "step": 204300
+ },
+ {
+ "epoch": 1.0543233189386083,
+ "grad_norm": 21518.763671875,
+ "learning_rate": 2.2591605933699632e-06,
+ "loss": 0.3992,
+ "step": 204350
+ },
+ {
+ "epoch": 1.0545812889212212,
+ "grad_norm": 25077.322265625,
+ "learning_rate": 2.248316756793156e-06,
+ "loss": 0.405,
+ "step": 204400
+ },
+ {
+ "epoch": 1.054839258903834,
+ "grad_norm": 23907.869140625,
+ "learning_rate": 2.237498409140215e-06,
+ "loss": 0.4009,
+ "step": 204450
+ },
+ {
+ "epoch": 1.0550972288864469,
+ "grad_norm": 22796.865234375,
+ "learning_rate": 2.2267055561857484e-06,
+ "loss": 0.4044,
+ "step": 204500
+ },
+ {
+ "epoch": 1.0553551988690595,
+ "grad_norm": 33471.05859375,
+ "learning_rate": 2.2159382036907927e-06,
+ "loss": 0.4021,
+ "step": 204550
+ },
+ {
+ "epoch": 1.0556131688516723,
+ "grad_norm": 23975.6640625,
+ "learning_rate": 2.2051963574027225e-06,
+ "loss": 0.3922,
+ "step": 204600
+ },
+ {
+ "epoch": 1.0558711388342852,
+ "grad_norm": 24563.220703125,
+ "learning_rate": 2.194480023055351e-06,
+ "loss": 0.3952,
+ "step": 204650
+ },
+ {
+ "epoch": 1.056129108816898,
+ "grad_norm": 24479.20703125,
+ "learning_rate": 2.1837892063688525e-06,
+ "loss": 0.4005,
+ "step": 204700
+ },
+ {
+ "epoch": 1.0563870787995109,
+ "grad_norm": 24895.6640625,
+ "learning_rate": 2.173123913049757e-06,
+ "loss": 0.3985,
+ "step": 204750
+ },
+ {
+ "epoch": 1.0566450487821237,
+ "grad_norm": 25606.34765625,
+ "learning_rate": 2.1624841487910052e-06,
+ "loss": 0.4019,
+ "step": 204800
+ },
+ {
+ "epoch": 1.0569030187647366,
+ "grad_norm": 23026.8828125,
+ "learning_rate": 2.151869919271904e-06,
+ "loss": 0.4023,
+ "step": 204850
+ },
+ {
+ "epoch": 1.0571609887473494,
+ "grad_norm": 24365.9609375,
+ "learning_rate": 2.1412812301581097e-06,
+ "loss": 0.3992,
+ "step": 204900
+ },
+ {
+ "epoch": 1.0574189587299623,
+ "grad_norm": 25374.990234375,
+ "learning_rate": 2.130718087101663e-06,
+ "loss": 0.4009,
+ "step": 204950
+ },
+ {
+ "epoch": 1.057676928712575,
+ "grad_norm": 23697.388671875,
+ "learning_rate": 2.1201804957409697e-06,
+ "loss": 0.4042,
+ "step": 205000
+ },
+ {
+ "epoch": 1.057676928712575,
+ "eval_loss": 0.386392205953598,
+ "eval_runtime": 3213.2768,
+ "eval_samples_per_second": 965.096,
+ "eval_steps_per_second": 1.885,
+ "step": 205000
+ },
+ {
+ "epoch": 1.0579348986951878,
+ "grad_norm": 23768.669921875,
+ "learning_rate": 2.109668461700781e-06,
+ "loss": 0.4058,
+ "step": 205050
+ },
+ {
+ "epoch": 1.0581928686778006,
+ "grad_norm": 24203.693359375,
+ "learning_rate": 2.099181990592236e-06,
+ "loss": 0.3971,
+ "step": 205100
+ },
+ {
+ "epoch": 1.0584508386604135,
+ "grad_norm": 26739.72265625,
+ "learning_rate": 2.088721088012796e-06,
+ "loss": 0.4008,
+ "step": 205150
+ },
+ {
+ "epoch": 1.0587088086430263,
+ "grad_norm": 25664.5,
+ "learning_rate": 2.078285759546289e-06,
+ "loss": 0.4001,
+ "step": 205200
+ },
+ {
+ "epoch": 1.0589667786256391,
+ "grad_norm": 22887.986328125,
+ "learning_rate": 2.067876010762898e-06,
+ "loss": 0.3946,
+ "step": 205250
+ },
+ {
+ "epoch": 1.059224748608252,
+ "grad_norm": 24908.890625,
+ "learning_rate": 2.057491847219134e-06,
+ "loss": 0.3997,
+ "step": 205300
+ },
+ {
+ "epoch": 1.0594827185908648,
+ "grad_norm": 26352.986328125,
+ "learning_rate": 2.0471332744578853e-06,
+ "loss": 0.4022,
+ "step": 205350
+ },
+ {
+ "epoch": 1.0597406885734777,
+ "grad_norm": 23545.6640625,
+ "learning_rate": 2.0368002980083235e-06,
+ "loss": 0.3976,
+ "step": 205400
+ },
+ {
+ "epoch": 1.0599986585560903,
+ "grad_norm": 24206.896484375,
+ "learning_rate": 2.02649292338602e-06,
+ "loss": 0.3934,
+ "step": 205450
+ },
+ {
+ "epoch": 1.0602566285387032,
+ "grad_norm": 22331.580078125,
+ "learning_rate": 2.0162111560928345e-06,
+ "loss": 0.3969,
+ "step": 205500
+ },
+ {
+ "epoch": 1.060514598521316,
+ "grad_norm": 24358.099609375,
+ "learning_rate": 2.0059550016169827e-06,
+ "loss": 0.3934,
+ "step": 205550
+ },
+ {
+ "epoch": 1.0607725685039289,
+ "grad_norm": 23970.693359375,
+ "learning_rate": 1.9957244654330133e-06,
+ "loss": 0.4012,
+ "step": 205600
+ },
+ {
+ "epoch": 1.0610305384865417,
+ "grad_norm": 23980.03125,
+ "learning_rate": 1.985519553001758e-06,
+ "loss": 0.3979,
+ "step": 205650
+ },
+ {
+ "epoch": 1.0612885084691546,
+ "grad_norm": 25418.708984375,
+ "learning_rate": 1.9753402697704313e-06,
+ "loss": 0.3988,
+ "step": 205700
+ },
+ {
+ "epoch": 1.0615464784517674,
+ "grad_norm": 22902.38671875,
+ "learning_rate": 1.965186621172521e-06,
+ "loss": 0.393,
+ "step": 205750
+ },
+ {
+ "epoch": 1.0618044484343803,
+ "grad_norm": 24547.4375,
+ "learning_rate": 1.9550586126278525e-06,
+ "loss": 0.4,
+ "step": 205800
+ },
+ {
+ "epoch": 1.062062418416993,
+ "grad_norm": 24038.619140625,
+ "learning_rate": 1.9449562495425623e-06,
+ "loss": 0.3995,
+ "step": 205850
+ },
+ {
+ "epoch": 1.0623203883996057,
+ "grad_norm": 22873.3515625,
+ "learning_rate": 1.9348795373090977e-06,
+ "loss": 0.4028,
+ "step": 205900
+ },
+ {
+ "epoch": 1.0625783583822186,
+ "grad_norm": 22140.7890625,
+ "learning_rate": 1.9248284813061957e-06,
+ "loss": 0.4036,
+ "step": 205950
+ },
+ {
+ "epoch": 1.0628363283648314,
+ "grad_norm": 23617.9140625,
+ "learning_rate": 1.914803086898942e-06,
+ "loss": 0.4005,
+ "step": 206000
+ },
+ {
+ "epoch": 1.0630942983474443,
+ "grad_norm": 22808.267578125,
+ "learning_rate": 1.9048033594386838e-06,
+ "loss": 0.3989,
+ "step": 206050
+ },
+ {
+ "epoch": 1.0633522683300571,
+ "grad_norm": 23189.298828125,
+ "learning_rate": 1.8948293042630794e-06,
+ "loss": 0.3982,
+ "step": 206100
+ },
+ {
+ "epoch": 1.06361023831267,
+ "grad_norm": 23994.052734375,
+ "learning_rate": 1.884880926696092e-06,
+ "loss": 0.4023,
+ "step": 206150
+ },
+ {
+ "epoch": 1.0638682082952828,
+ "grad_norm": 25587.49609375,
+ "learning_rate": 1.8749582320479687e-06,
+ "loss": 0.4056,
+ "step": 206200
+ },
+ {
+ "epoch": 1.0641261782778957,
+ "grad_norm": 22929.3984375,
+ "learning_rate": 1.865061225615261e-06,
+ "loss": 0.3987,
+ "step": 206250
+ },
+ {
+ "epoch": 1.0643841482605083,
+ "grad_norm": 24747.65234375,
+ "learning_rate": 1.8551899126807825e-06,
+ "loss": 0.3959,
+ "step": 206300
+ },
+ {
+ "epoch": 1.0646421182431212,
+ "grad_norm": 24856.77734375,
+ "learning_rate": 1.8453442985136682e-06,
+ "loss": 0.3989,
+ "step": 206350
+ },
+ {
+ "epoch": 1.064900088225734,
+ "grad_norm": 28136.97265625,
+ "learning_rate": 1.835524388369303e-06,
+ "loss": 0.396,
+ "step": 206400
+ },
+ {
+ "epoch": 1.0651580582083469,
+ "grad_norm": 25035.076171875,
+ "learning_rate": 1.8257301874893607e-06,
+ "loss": 0.4,
+ "step": 206450
+ },
+ {
+ "epoch": 1.0654160281909597,
+ "grad_norm": 23690.525390625,
+ "learning_rate": 1.8159617011018205e-06,
+ "loss": 0.3982,
+ "step": 206500
+ },
+ {
+ "epoch": 1.0656739981735726,
+ "grad_norm": 25419.333984375,
+ "learning_rate": 1.8062189344208835e-06,
+ "loss": 0.401,
+ "step": 206550
+ },
+ {
+ "epoch": 1.0659319681561854,
+ "grad_norm": 24045.5703125,
+ "learning_rate": 1.7965018926470622e-06,
+ "loss": 0.3974,
+ "step": 206600
+ },
+ {
+ "epoch": 1.0661899381387983,
+ "grad_norm": 23030.5625,
+ "learning_rate": 1.7868105809671298e-06,
+ "loss": 0.4049,
+ "step": 206650
+ },
+ {
+ "epoch": 1.066447908121411,
+ "grad_norm": 26036.546875,
+ "learning_rate": 1.7771450045541149e-06,
+ "loss": 0.3948,
+ "step": 206700
+ },
+ {
+ "epoch": 1.0667058781040237,
+ "grad_norm": 25025.001953125,
+ "learning_rate": 1.7675051685673127e-06,
+ "loss": 0.404,
+ "step": 206750
+ },
+ {
+ "epoch": 1.0669638480866366,
+ "grad_norm": 24296.775390625,
+ "learning_rate": 1.757891078152285e-06,
+ "loss": 0.3953,
+ "step": 206800
+ },
+ {
+ "epoch": 1.0672218180692494,
+ "grad_norm": 22506.177734375,
+ "learning_rate": 1.748302738440838e-06,
+ "loss": 0.4,
+ "step": 206850
+ },
+ {
+ "epoch": 1.0674797880518623,
+ "grad_norm": 26741.724609375,
+ "learning_rate": 1.738740154551055e-06,
+ "loss": 0.3971,
+ "step": 206900
+ },
+ {
+ "epoch": 1.0677377580344751,
+ "grad_norm": 24435.2421875,
+ "learning_rate": 1.7292033315872592e-06,
+ "loss": 0.3955,
+ "step": 206950
+ },
+ {
+ "epoch": 1.067995728017088,
+ "grad_norm": 21010.521484375,
+ "learning_rate": 1.7196922746400058e-06,
+ "loss": 0.3997,
+ "step": 207000
+ },
+ {
+ "epoch": 1.0682536979997008,
+ "grad_norm": 21215.384765625,
+ "learning_rate": 1.710206988786134e-06,
+ "loss": 0.3971,
+ "step": 207050
+ },
+ {
+ "epoch": 1.0685116679823137,
+ "grad_norm": 23713.73046875,
+ "learning_rate": 1.7007474790886823e-06,
+ "loss": 0.4022,
+ "step": 207100
+ },
+ {
+ "epoch": 1.0687696379649263,
+ "grad_norm": 26804.658203125,
+ "learning_rate": 1.691313750596979e-06,
+ "loss": 0.4028,
+ "step": 207150
+ },
+ {
+ "epoch": 1.0690276079475391,
+ "grad_norm": 24873.318359375,
+ "learning_rate": 1.68190580834654e-06,
+ "loss": 0.3995,
+ "step": 207200
+ },
+ {
+ "epoch": 1.069285577930152,
+ "grad_norm": 23567.91796875,
+ "learning_rate": 1.6725236573591596e-06,
+ "loss": 0.3988,
+ "step": 207250
+ },
+ {
+ "epoch": 1.0695435479127648,
+ "grad_norm": 23659.513671875,
+ "learning_rate": 1.6631673026428484e-06,
+ "loss": 0.4036,
+ "step": 207300
+ },
+ {
+ "epoch": 1.0698015178953777,
+ "grad_norm": 22014.623046875,
+ "learning_rate": 1.6538367491918339e-06,
+ "loss": 0.4003,
+ "step": 207350
+ },
+ {
+ "epoch": 1.0700594878779905,
+ "grad_norm": 26985.240234375,
+ "learning_rate": 1.6445320019865984e-06,
+ "loss": 0.3949,
+ "step": 207400
+ },
+ {
+ "epoch": 1.0703174578606034,
+ "grad_norm": 25032.328125,
+ "learning_rate": 1.635253065993836e-06,
+ "loss": 0.4072,
+ "step": 207450
+ },
+ {
+ "epoch": 1.0705754278432162,
+ "grad_norm": 23999.62890625,
+ "learning_rate": 1.6259999461664566e-06,
+ "loss": 0.4018,
+ "step": 207500
+ },
+ {
+ "epoch": 1.070833397825829,
+ "grad_norm": 24842.439453125,
+ "learning_rate": 1.616772647443593e-06,
+ "loss": 0.3992,
+ "step": 207550
+ },
+ {
+ "epoch": 1.0710913678084417,
+ "grad_norm": 26740.083984375,
+ "learning_rate": 1.6075711747506106e-06,
+ "loss": 0.3954,
+ "step": 207600
+ },
+ {
+ "epoch": 1.0713493377910546,
+ "grad_norm": 25067.95703125,
+ "learning_rate": 1.598395532999064e-06,
+ "loss": 0.4008,
+ "step": 207650
+ },
+ {
+ "epoch": 1.0716073077736674,
+ "grad_norm": 22218.814453125,
+ "learning_rate": 1.5892457270867467e-06,
+ "loss": 0.4005,
+ "step": 207700
+ },
+ {
+ "epoch": 1.0718652777562803,
+ "grad_norm": 25727.36328125,
+ "learning_rate": 1.5801217618976294e-06,
+ "loss": 0.402,
+ "step": 207750
+ },
+ {
+ "epoch": 1.0721232477388931,
+ "grad_norm": 24692.19921875,
+ "learning_rate": 1.5710236423019275e-06,
+ "loss": 0.4035,
+ "step": 207800
+ },
+ {
+ "epoch": 1.072381217721506,
+ "grad_norm": 25514.009765625,
+ "learning_rate": 1.5619513731560342e-06,
+ "loss": 0.3964,
+ "step": 207850
+ },
+ {
+ "epoch": 1.0726391877041188,
+ "grad_norm": 24503.408203125,
+ "learning_rate": 1.5529049593025425e-06,
+ "loss": 0.4036,
+ "step": 207900
+ },
+ {
+ "epoch": 1.0728971576867317,
+ "grad_norm": 27466.498046875,
+ "learning_rate": 1.5438844055702728e-06,
+ "loss": 0.4019,
+ "step": 207950
+ },
+ {
+ "epoch": 1.0731551276693443,
+ "grad_norm": 24170.1171875,
+ "learning_rate": 1.5348897167742015e-06,
+ "loss": 0.4005,
+ "step": 208000
+ },
+ {
+ "epoch": 1.0734130976519571,
+ "grad_norm": 24094.044921875,
+ "learning_rate": 1.525920897715527e-06,
+ "loss": 0.402,
+ "step": 208050
+ },
+ {
+ "epoch": 1.07367106763457,
+ "grad_norm": 22958.8125,
+ "learning_rate": 1.5169779531816365e-06,
+ "loss": 0.4041,
+ "step": 208100
+ },
+ {
+ "epoch": 1.0739290376171828,
+ "grad_norm": 24056.849609375,
+ "learning_rate": 1.508060887946089e-06,
+ "loss": 0.4017,
+ "step": 208150
+ },
+ {
+ "epoch": 1.0741870075997957,
+ "grad_norm": 22397.435546875,
+ "learning_rate": 1.499169706768655e-06,
+ "loss": 0.4036,
+ "step": 208200
+ },
+ {
+ "epoch": 1.0744449775824085,
+ "grad_norm": 22317.74609375,
+ "learning_rate": 1.4903044143952604e-06,
+ "loss": 0.3992,
+ "step": 208250
+ },
+ {
+ "epoch": 1.0747029475650214,
+ "grad_norm": 22923.57421875,
+ "learning_rate": 1.4814650155580367e-06,
+ "loss": 0.3953,
+ "step": 208300
+ },
+ {
+ "epoch": 1.0749609175476342,
+ "grad_norm": 24276.650390625,
+ "learning_rate": 1.4726515149752818e-06,
+ "loss": 0.4052,
+ "step": 208350
+ },
+ {
+ "epoch": 1.075218887530247,
+ "grad_norm": 27791.369140625,
+ "learning_rate": 1.4638639173514712e-06,
+ "loss": 0.4027,
+ "step": 208400
+ },
+ {
+ "epoch": 1.0754768575128597,
+ "grad_norm": 22683.73046875,
+ "learning_rate": 1.4551022273772585e-06,
+ "loss": 0.4036,
+ "step": 208450
+ },
+ {
+ "epoch": 1.0757348274954726,
+ "grad_norm": 26474.087890625,
+ "learning_rate": 1.4463664497294527e-06,
+ "loss": 0.3966,
+ "step": 208500
+ },
+ {
+ "epoch": 1.0759927974780854,
+ "grad_norm": 25933.25390625,
+ "learning_rate": 1.4376565890710514e-06,
+ "loss": 0.4042,
+ "step": 208550
+ },
+ {
+ "epoch": 1.0762507674606983,
+ "grad_norm": 23373.078125,
+ "learning_rate": 1.4289726500512134e-06,
+ "loss": 0.3971,
+ "step": 208600
+ },
+ {
+ "epoch": 1.076508737443311,
+ "grad_norm": 23282.916015625,
+ "learning_rate": 1.4203146373052423e-06,
+ "loss": 0.4038,
+ "step": 208650
+ },
+ {
+ "epoch": 1.076766707425924,
+ "grad_norm": 26307.12109375,
+ "learning_rate": 1.4116825554546353e-06,
+ "loss": 0.3959,
+ "step": 208700
+ },
+ {
+ "epoch": 1.0770246774085368,
+ "grad_norm": 24472.884765625,
+ "learning_rate": 1.4030764091070237e-06,
+ "loss": 0.3954,
+ "step": 208750
+ },
+ {
+ "epoch": 1.0772826473911497,
+ "grad_norm": 25234.9375,
+ "learning_rate": 1.394496202856188e-06,
+ "loss": 0.4003,
+ "step": 208800
+ },
+ {
+ "epoch": 1.0775406173737623,
+ "grad_norm": 31742.607421875,
+ "learning_rate": 1.385941941282104e-06,
+ "loss": 0.3963,
+ "step": 208850
+ },
+ {
+ "epoch": 1.0777985873563751,
+ "grad_norm": 22577.599609375,
+ "learning_rate": 1.3774136289508466e-06,
+ "loss": 0.4003,
+ "step": 208900
+ },
+ {
+ "epoch": 1.078056557338988,
+ "grad_norm": 24765.111328125,
+ "learning_rate": 1.3689112704146745e-06,
+ "loss": 0.3965,
+ "step": 208950
+ },
+ {
+ "epoch": 1.0783145273216008,
+ "grad_norm": 24935.205078125,
+ "learning_rate": 1.3604348702119795e-06,
+ "loss": 0.4001,
+ "step": 209000
+ },
+ {
+ "epoch": 1.0785724973042137,
+ "grad_norm": 25825.361328125,
+ "learning_rate": 1.3519844328673037e-06,
+ "loss": 0.4014,
+ "step": 209050
+ },
+ {
+ "epoch": 1.0788304672868265,
+ "grad_norm": 23713.068359375,
+ "learning_rate": 1.343559962891322e-06,
+ "loss": 0.3961,
+ "step": 209100
+ },
+ {
+ "epoch": 1.0790884372694394,
+ "grad_norm": 24578.435546875,
+ "learning_rate": 1.3351614647808542e-06,
+ "loss": 0.3983,
+ "step": 209150
+ },
+ {
+ "epoch": 1.0793464072520522,
+ "grad_norm": 22323.19140625,
+ "learning_rate": 1.3267889430188585e-06,
+ "loss": 0.4005,
+ "step": 209200
+ },
+ {
+ "epoch": 1.079604377234665,
+ "grad_norm": 22834.76953125,
+ "learning_rate": 1.3184424020744212e-06,
+ "loss": 0.3964,
+ "step": 209250
+ },
+ {
+ "epoch": 1.0798623472172777,
+ "grad_norm": 22097.615234375,
+ "learning_rate": 1.3101218464027676e-06,
+ "loss": 0.3932,
+ "step": 209300
+ },
+ {
+ "epoch": 1.0801203171998905,
+ "grad_norm": 23564.677734375,
+ "learning_rate": 1.3018272804452503e-06,
+ "loss": 0.3996,
+ "step": 209350
+ },
+ {
+ "epoch": 1.0803782871825034,
+ "grad_norm": 25264.150390625,
+ "learning_rate": 1.2935587086293443e-06,
+ "loss": 0.3975,
+ "step": 209400
+ },
+ {
+ "epoch": 1.0806362571651162,
+ "grad_norm": 22622.1015625,
+ "learning_rate": 1.2853161353686526e-06,
+ "loss": 0.4028,
+ "step": 209450
+ },
+ {
+ "epoch": 1.080894227147729,
+ "grad_norm": 25768.478515625,
+ "learning_rate": 1.2770995650629058e-06,
+ "loss": 0.395,
+ "step": 209500
+ },
+ {
+ "epoch": 1.081152197130342,
+ "grad_norm": 21997.23828125,
+ "learning_rate": 1.2689090020979455e-06,
+ "loss": 0.3997,
+ "step": 209550
+ },
+ {
+ "epoch": 1.0814101671129548,
+ "grad_norm": 24653.796875,
+ "learning_rate": 1.26074445084573e-06,
+ "loss": 0.3992,
+ "step": 209600
+ },
+ {
+ "epoch": 1.0816681370955676,
+ "grad_norm": 25631.18359375,
+ "learning_rate": 1.252605915664362e-06,
+ "loss": 0.4006,
+ "step": 209650
+ },
+ {
+ "epoch": 1.0819261070781803,
+ "grad_norm": 25373.162109375,
+ "learning_rate": 1.2444934008980058e-06,
+ "loss": 0.4047,
+ "step": 209700
+ },
+ {
+ "epoch": 1.0821840770607931,
+ "grad_norm": 23108.03125,
+ "learning_rate": 1.2364069108769804e-06,
+ "loss": 0.3994,
+ "step": 209750
+ },
+ {
+ "epoch": 1.082442047043406,
+ "grad_norm": 23362.0546875,
+ "learning_rate": 1.2283464499177e-06,
+ "loss": 0.4059,
+ "step": 209800
+ },
+ {
+ "epoch": 1.0827000170260188,
+ "grad_norm": 19350.4609375,
+ "learning_rate": 1.2203120223226727e-06,
+ "loss": 0.4012,
+ "step": 209850
+ },
+ {
+ "epoch": 1.0829579870086317,
+ "grad_norm": 24877.921875,
+ "learning_rate": 1.2123036323805237e-06,
+ "loss": 0.3989,
+ "step": 209900
+ },
+ {
+ "epoch": 1.0832159569912445,
+ "grad_norm": 25544.15625,
+ "learning_rate": 1.2043212843659724e-06,
+ "loss": 0.3963,
+ "step": 209950
+ },
+ {
+ "epoch": 1.0834739269738574,
+ "grad_norm": 24271.380859375,
+ "learning_rate": 1.1963649825398494e-06,
+ "loss": 0.4026,
+ "step": 210000
+ },
+ {
+ "epoch": 1.0834739269738574,
+ "eval_loss": 0.3863469064235687,
+ "eval_runtime": 3245.2698,
+ "eval_samples_per_second": 955.582,
+ "eval_steps_per_second": 1.866,
+ "step": 210000
+ },
+ {
+ "epoch": 1.0837318969564702,
+ "grad_norm": 23787.166015625,
+ "learning_rate": 1.188434731149074e-06,
+ "loss": 0.3965,
+ "step": 210050
+ },
+ {
+ "epoch": 1.083989866939083,
+ "grad_norm": 23804.666015625,
+ "learning_rate": 1.1805305344266604e-06,
+ "loss": 0.402,
+ "step": 210100
+ },
+ {
+ "epoch": 1.084247836921696,
+ "grad_norm": 25432.59765625,
+ "learning_rate": 1.1726523965917113e-06,
+ "loss": 0.4004,
+ "step": 210150
+ },
+ {
+ "epoch": 1.0845058069043085,
+ "grad_norm": 23947.248046875,
+ "learning_rate": 1.1648003218494242e-06,
+ "loss": 0.404,
+ "step": 210200
+ },
+ {
+ "epoch": 1.0847637768869214,
+ "grad_norm": 25563.51953125,
+ "learning_rate": 1.156974314391085e-06,
+ "loss": 0.3985,
+ "step": 210250
+ },
+ {
+ "epoch": 1.0850217468695342,
+ "grad_norm": 36885.1484375,
+ "learning_rate": 1.1491743783940801e-06,
+ "loss": 0.3974,
+ "step": 210300
+ },
+ {
+ "epoch": 1.085279716852147,
+ "grad_norm": 25414.0859375,
+ "learning_rate": 1.1414005180218346e-06,
+ "loss": 0.3988,
+ "step": 210350
+ },
+ {
+ "epoch": 1.08553768683476,
+ "grad_norm": 24175.00390625,
+ "learning_rate": 1.1336527374239125e-06,
+ "loss": 0.3995,
+ "step": 210400
+ },
+ {
+ "epoch": 1.0857956568173728,
+ "grad_norm": 21867.005859375,
+ "learning_rate": 1.1259310407359114e-06,
+ "loss": 0.3963,
+ "step": 210450
+ },
+ {
+ "epoch": 1.0860536267999856,
+ "grad_norm": 22963.5078125,
+ "learning_rate": 1.1182354320795285e-06,
+ "loss": 0.3961,
+ "step": 210500
+ },
+ {
+ "epoch": 1.0863115967825985,
+ "grad_norm": 24181.43359375,
+ "learning_rate": 1.1105659155625393e-06,
+ "loss": 0.3936,
+ "step": 210550
+ },
+ {
+ "epoch": 1.086569566765211,
+ "grad_norm": 26222.150390625,
+ "learning_rate": 1.1029224952787687e-06,
+ "loss": 0.402,
+ "step": 210600
+ },
+ {
+ "epoch": 1.086827536747824,
+ "grad_norm": 23023.005859375,
+ "learning_rate": 1.0953051753081368e-06,
+ "loss": 0.3887,
+ "step": 210650
+ },
+ {
+ "epoch": 1.0870855067304368,
+ "grad_norm": 23715.748046875,
+ "learning_rate": 1.0877139597166186e-06,
+ "loss": 0.3984,
+ "step": 210700
+ },
+ {
+ "epoch": 1.0873434767130497,
+ "grad_norm": 25058.646484375,
+ "learning_rate": 1.0801488525562565e-06,
+ "loss": 0.395,
+ "step": 210750
+ },
+ {
+ "epoch": 1.0876014466956625,
+ "grad_norm": 23697.73046875,
+ "learning_rate": 1.0726098578651588e-06,
+ "loss": 0.3993,
+ "step": 210800
+ },
+ {
+ "epoch": 1.0878594166782753,
+ "grad_norm": 23380.267578125,
+ "learning_rate": 1.065096979667496e-06,
+ "loss": 0.393,
+ "step": 210850
+ },
+ {
+ "epoch": 1.0881173866608882,
+ "grad_norm": 27738.705078125,
+ "learning_rate": 1.0576102219734985e-06,
+ "loss": 0.393,
+ "step": 210900
+ },
+ {
+ "epoch": 1.088375356643501,
+ "grad_norm": 26636.419921875,
+ "learning_rate": 1.0501495887794478e-06,
+ "loss": 0.3969,
+ "step": 210950
+ },
+ {
+ "epoch": 1.088633326626114,
+ "grad_norm": 23831.09765625,
+ "learning_rate": 1.0427150840676913e-06,
+ "loss": 0.3962,
+ "step": 211000
+ },
+ {
+ "epoch": 1.0888912966087265,
+ "grad_norm": 25435.53515625,
+ "learning_rate": 1.0353067118066163e-06,
+ "loss": 0.4019,
+ "step": 211050
+ },
+ {
+ "epoch": 1.0891492665913394,
+ "grad_norm": 25170.9296875,
+ "learning_rate": 1.0279244759506756e-06,
+ "loss": 0.4062,
+ "step": 211100
+ },
+ {
+ "epoch": 1.0894072365739522,
+ "grad_norm": 22411.763671875,
+ "learning_rate": 1.0205683804403564e-06,
+ "loss": 0.4056,
+ "step": 211150
+ },
+ {
+ "epoch": 1.089665206556565,
+ "grad_norm": 20984.373046875,
+ "learning_rate": 1.0132384292022124e-06,
+ "loss": 0.3999,
+ "step": 211200
+ },
+ {
+ "epoch": 1.089923176539178,
+ "grad_norm": 23698.67578125,
+ "learning_rate": 1.0059346261488133e-06,
+ "loss": 0.3983,
+ "step": 211250
+ },
+ {
+ "epoch": 1.0901811465217908,
+ "grad_norm": 25426.779296875,
+ "learning_rate": 9.986569751788023e-07,
+ "loss": 0.3923,
+ "step": 211300
+ },
+ {
+ "epoch": 1.0904391165044036,
+ "grad_norm": 26947.849609375,
+ "learning_rate": 9.914054801768435e-07,
+ "loss": 0.3961,
+ "step": 211350
+ },
+ {
+ "epoch": 1.0906970864870165,
+ "grad_norm": 25117.458984375,
+ "learning_rate": 9.841801450136357e-07,
+ "loss": 0.389,
+ "step": 211400
+ },
+ {
+ "epoch": 1.090955056469629,
+ "grad_norm": 23383.60546875,
+ "learning_rate": 9.76980973545938e-07,
+ "loss": 0.4045,
+ "step": 211450
+ },
+ {
+ "epoch": 1.091213026452242,
+ "grad_norm": 26197.37109375,
+ "learning_rate": 9.698079696165153e-07,
+ "loss": 0.3956,
+ "step": 211500
+ },
+ {
+ "epoch": 1.0914709964348548,
+ "grad_norm": 24827.63671875,
+ "learning_rate": 9.626611370541882e-07,
+ "loss": 0.3959,
+ "step": 211550
+ },
+ {
+ "epoch": 1.0917289664174676,
+ "grad_norm": 23645.685546875,
+ "learning_rate": 9.555404796737944e-07,
+ "loss": 0.3967,
+ "step": 211600
+ },
+ {
+ "epoch": 1.0919869364000805,
+ "grad_norm": 23297.22265625,
+ "learning_rate": 9.484460012761986e-07,
+ "loss": 0.3979,
+ "step": 211650
+ },
+ {
+ "epoch": 1.0922449063826933,
+ "grad_norm": 25558.216796875,
+ "learning_rate": 9.413777056482998e-07,
+ "loss": 0.4049,
+ "step": 211700
+ },
+ {
+ "epoch": 1.0925028763653062,
+ "grad_norm": 24427.572265625,
+ "learning_rate": 9.343355965630185e-07,
+ "loss": 0.4005,
+ "step": 211750
+ },
+ {
+ "epoch": 1.092760846347919,
+ "grad_norm": 21614.783203125,
+ "learning_rate": 9.273196777792926e-07,
+ "loss": 0.4008,
+ "step": 211800
+ },
+ {
+ "epoch": 1.0930188163305319,
+ "grad_norm": 24935.197265625,
+ "learning_rate": 9.203299530420873e-07,
+ "loss": 0.3974,
+ "step": 211850
+ },
+ {
+ "epoch": 1.0932767863131445,
+ "grad_norm": 25032.623046875,
+ "learning_rate": 9.133664260823848e-07,
+ "loss": 0.3974,
+ "step": 211900
+ },
+ {
+ "epoch": 1.0935347562957574,
+ "grad_norm": 25528.576171875,
+ "learning_rate": 9.064291006171732e-07,
+ "loss": 0.3969,
+ "step": 211950
+ },
+ {
+ "epoch": 1.0937927262783702,
+ "grad_norm": 25501.603515625,
+ "learning_rate": 8.99517980349468e-07,
+ "loss": 0.3995,
+ "step": 212000
+ },
+ {
+ "epoch": 1.094050696260983,
+ "grad_norm": 23834.970703125,
+ "learning_rate": 8.926330689682849e-07,
+ "loss": 0.4029,
+ "step": 212050
+ },
+ {
+ "epoch": 1.094308666243596,
+ "grad_norm": 25106.78125,
+ "learning_rate": 8.857743701486676e-07,
+ "loss": 0.3977,
+ "step": 212100
+ },
+ {
+ "epoch": 1.0945666362262088,
+ "grad_norm": 20878.955078125,
+ "learning_rate": 8.789418875516431e-07,
+ "loss": 0.3995,
+ "step": 212150
+ },
+ {
+ "epoch": 1.0948246062088216,
+ "grad_norm": 20234.91796875,
+ "learning_rate": 8.721356248242662e-07,
+ "loss": 0.3987,
+ "step": 212200
+ },
+ {
+ "epoch": 1.0950825761914345,
+ "grad_norm": 25996.611328125,
+ "learning_rate": 8.653555855995921e-07,
+ "loss": 0.3962,
+ "step": 212250
+ },
+ {
+ "epoch": 1.095340546174047,
+ "grad_norm": 26965.455078125,
+ "learning_rate": 8.586017734966644e-07,
+ "loss": 0.4022,
+ "step": 212300
+ },
+ {
+ "epoch": 1.09559851615666,
+ "grad_norm": 25616.74609375,
+ "learning_rate": 8.518741921205498e-07,
+ "loss": 0.3956,
+ "step": 212350
+ },
+ {
+ "epoch": 1.0958564861392728,
+ "grad_norm": 23590.908203125,
+ "learning_rate": 8.451728450622864e-07,
+ "loss": 0.3989,
+ "step": 212400
+ },
+ {
+ "epoch": 1.0961144561218856,
+ "grad_norm": 24570.7109375,
+ "learning_rate": 8.384977358989355e-07,
+ "loss": 0.3975,
+ "step": 212450
+ },
+ {
+ "epoch": 1.0963724261044985,
+ "grad_norm": 24186.62890625,
+ "learning_rate": 8.318488681935354e-07,
+ "loss": 0.4027,
+ "step": 212500
+ },
+ {
+ "epoch": 1.0966303960871113,
+ "grad_norm": 26281.365234375,
+ "learning_rate": 8.252262454951198e-07,
+ "loss": 0.398,
+ "step": 212550
+ },
+ {
+ "epoch": 1.0968883660697242,
+ "grad_norm": 25870.1484375,
+ "learning_rate": 8.186298713387219e-07,
+ "loss": 0.4032,
+ "step": 212600
+ },
+ {
+ "epoch": 1.097146336052337,
+ "grad_norm": 25573.958984375,
+ "learning_rate": 8.120597492453586e-07,
+ "loss": 0.3986,
+ "step": 212650
+ },
+ {
+ "epoch": 1.0974043060349499,
+ "grad_norm": 24439.345703125,
+ "learning_rate": 8.055158827220355e-07,
+ "loss": 0.4053,
+ "step": 212700
+ },
+ {
+ "epoch": 1.0976622760175625,
+ "grad_norm": 26741.91015625,
+ "learning_rate": 7.989982752617364e-07,
+ "loss": 0.4,
+ "step": 212750
+ },
+ {
+ "epoch": 1.0979202460001753,
+ "grad_norm": 24200.69921875,
+ "learning_rate": 7.92506930343434e-07,
+ "loss": 0.4003,
+ "step": 212800
+ },
+ {
+ "epoch": 1.0981782159827882,
+ "grad_norm": 22554.67578125,
+ "learning_rate": 7.860418514320844e-07,
+ "loss": 0.4023,
+ "step": 212850
+ },
+ {
+ "epoch": 1.098436185965401,
+ "grad_norm": 24178.01171875,
+ "learning_rate": 7.796030419786271e-07,
+ "loss": 0.3966,
+ "step": 212900
+ },
+ {
+ "epoch": 1.098694155948014,
+ "grad_norm": 23407.0859375,
+ "learning_rate": 7.731905054199629e-07,
+ "loss": 0.3986,
+ "step": 212950
+ },
+ {
+ "epoch": 1.0989521259306267,
+ "grad_norm": 23653.775390625,
+ "learning_rate": 7.66804245178987e-07,
+ "loss": 0.4052,
+ "step": 213000
+ },
+ {
+ "epoch": 1.0992100959132396,
+ "grad_norm": 24526.658203125,
+ "learning_rate": 7.604442646645615e-07,
+ "loss": 0.4013,
+ "step": 213050
+ },
+ {
+ "epoch": 1.0994680658958524,
+ "grad_norm": 22805.16015625,
+ "learning_rate": 7.541105672715154e-07,
+ "loss": 0.4032,
+ "step": 213100
+ },
+ {
+ "epoch": 1.099726035878465,
+ "grad_norm": 25872.771484375,
+ "learning_rate": 7.478031563806664e-07,
+ "loss": 0.3991,
+ "step": 213150
+ },
+ {
+ "epoch": 1.099984005861078,
+ "grad_norm": 26245.833984375,
+ "learning_rate": 7.415220353587715e-07,
+ "loss": 0.4038,
+ "step": 213200
+ },
+ {
+ "epoch": 1.1002419758436908,
+ "grad_norm": 23856.12109375,
+ "learning_rate": 7.35267207558582e-07,
+ "loss": 0.4061,
+ "step": 213250
+ },
+ {
+ "epoch": 1.1004999458263036,
+ "grad_norm": 22112.50390625,
+ "learning_rate": 7.290386763187995e-07,
+ "loss": 0.3976,
+ "step": 213300
+ },
+ {
+ "epoch": 1.1007579158089165,
+ "grad_norm": 23340.501953125,
+ "learning_rate": 7.228364449640978e-07,
+ "loss": 0.4038,
+ "step": 213350
+ },
+ {
+ "epoch": 1.1010158857915293,
+ "grad_norm": 23487.26171875,
+ "learning_rate": 7.166605168051066e-07,
+ "loss": 0.4002,
+ "step": 213400
+ },
+ {
+ "epoch": 1.1012738557741422,
+ "grad_norm": 25290.654296875,
+ "learning_rate": 7.105108951384109e-07,
+ "loss": 0.3952,
+ "step": 213450
+ },
+ {
+ "epoch": 1.101531825756755,
+ "grad_norm": 21821.9921875,
+ "learning_rate": 7.04387583246563e-07,
+ "loss": 0.4004,
+ "step": 213500
+ },
+ {
+ "epoch": 1.1017897957393679,
+ "grad_norm": 25063.50390625,
+ "learning_rate": 6.982905843980758e-07,
+ "loss": 0.4081,
+ "step": 213550
+ },
+ {
+ "epoch": 1.1020477657219805,
+ "grad_norm": 25687.083984375,
+ "learning_rate": 6.922199018474018e-07,
+ "loss": 0.3988,
+ "step": 213600
+ },
+ {
+ "epoch": 1.1023057357045933,
+ "grad_norm": 26373.240234375,
+ "learning_rate": 6.861755388349489e-07,
+ "loss": 0.3971,
+ "step": 213650
+ },
+ {
+ "epoch": 1.1025637056872062,
+ "grad_norm": 23180.66015625,
+ "learning_rate": 6.801574985870917e-07,
+ "loss": 0.394,
+ "step": 213700
+ },
+ {
+ "epoch": 1.102821675669819,
+ "grad_norm": 25684.03515625,
+ "learning_rate": 6.741657843161387e-07,
+ "loss": 0.3972,
+ "step": 213750
+ },
+ {
+ "epoch": 1.1030796456524319,
+ "grad_norm": 24192.1328125,
+ "learning_rate": 6.682003992203534e-07,
+ "loss": 0.3941,
+ "step": 213800
+ },
+ {
+ "epoch": 1.1033376156350447,
+ "grad_norm": 23294.033203125,
+ "learning_rate": 6.622613464839334e-07,
+ "loss": 0.3984,
+ "step": 213850
+ },
+ {
+ "epoch": 1.1035955856176576,
+ "grad_norm": 24957.40234375,
+ "learning_rate": 6.563486292770426e-07,
+ "loss": 0.3983,
+ "step": 213900
+ },
+ {
+ "epoch": 1.1038535556002704,
+ "grad_norm": 22199.44921875,
+ "learning_rate": 6.504622507557678e-07,
+ "loss": 0.3998,
+ "step": 213950
+ },
+ {
+ "epoch": 1.1041115255828833,
+ "grad_norm": 22867.84375,
+ "learning_rate": 6.4460221406214e-07,
+ "loss": 0.4024,
+ "step": 214000
+ },
+ {
+ "epoch": 1.104369495565496,
+ "grad_norm": 23964.05078125,
+ "learning_rate": 6.387685223241513e-07,
+ "loss": 0.3981,
+ "step": 214050
+ },
+ {
+ "epoch": 1.1046274655481088,
+ "grad_norm": 21671.08984375,
+ "learning_rate": 6.329611786556888e-07,
+ "loss": 0.3927,
+ "step": 214100
+ },
+ {
+ "epoch": 1.1048854355307216,
+ "grad_norm": 23079.9453125,
+ "learning_rate": 6.271801861566229e-07,
+ "loss": 0.3966,
+ "step": 214150
+ },
+ {
+ "epoch": 1.1051434055133345,
+ "grad_norm": 25017.455078125,
+ "learning_rate": 6.214255479127185e-07,
+ "loss": 0.3958,
+ "step": 214200
+ },
+ {
+ "epoch": 1.1054013754959473,
+ "grad_norm": 24560.5,
+ "learning_rate": 6.156972669956961e-07,
+ "loss": 0.3952,
+ "step": 214250
+ },
+ {
+ "epoch": 1.1056593454785602,
+ "grad_norm": 23092.80859375,
+ "learning_rate": 6.099953464632047e-07,
+ "loss": 0.3996,
+ "step": 214300
+ },
+ {
+ "epoch": 1.105917315461173,
+ "grad_norm": 24638.765625,
+ "learning_rate": 6.043197893588149e-07,
+ "loss": 0.396,
+ "step": 214350
+ },
+ {
+ "epoch": 1.1061752854437858,
+ "grad_norm": 24716.921875,
+ "learning_rate": 5.986705987120256e-07,
+ "loss": 0.3991,
+ "step": 214400
+ },
+ {
+ "epoch": 1.1064332554263987,
+ "grad_norm": 25669.224609375,
+ "learning_rate": 5.930477775382748e-07,
+ "loss": 0.4001,
+ "step": 214450
+ },
+ {
+ "epoch": 1.1066912254090113,
+ "grad_norm": 22935.7265625,
+ "learning_rate": 5.874513288389116e-07,
+ "loss": 0.3984,
+ "step": 214500
+ },
+ {
+ "epoch": 1.1069491953916242,
+ "grad_norm": 21897.39453125,
+ "learning_rate": 5.818812556012076e-07,
+ "loss": 0.4009,
+ "step": 214550
+ },
+ {
+ "epoch": 1.107207165374237,
+ "grad_norm": 23161.447265625,
+ "learning_rate": 5.763375607983679e-07,
+ "loss": 0.3986,
+ "step": 214600
+ },
+ {
+ "epoch": 1.1074651353568499,
+ "grad_norm": 21095.642578125,
+ "learning_rate": 5.708202473894975e-07,
+ "loss": 0.4029,
+ "step": 214650
+ },
+ {
+ "epoch": 1.1077231053394627,
+ "grad_norm": 22999.712890625,
+ "learning_rate": 5.653293183196462e-07,
+ "loss": 0.3958,
+ "step": 214700
+ },
+ {
+ "epoch": 1.1079810753220756,
+ "grad_norm": 23144.787109375,
+ "learning_rate": 5.598647765197529e-07,
+ "loss": 0.3959,
+ "step": 214750
+ },
+ {
+ "epoch": 1.1082390453046884,
+ "grad_norm": 22194.73046875,
+ "learning_rate": 5.544266249066899e-07,
+ "loss": 0.3974,
+ "step": 214800
+ },
+ {
+ "epoch": 1.1084970152873013,
+ "grad_norm": 21749.3046875,
+ "learning_rate": 5.490148663832406e-07,
+ "loss": 0.3991,
+ "step": 214850
+ },
+ {
+ "epoch": 1.108754985269914,
+ "grad_norm": 22517.267578125,
+ "learning_rate": 5.43629503838089e-07,
+ "loss": 0.4011,
+ "step": 214900
+ },
+ {
+ "epoch": 1.1090129552525267,
+ "grad_norm": 26576.787109375,
+ "learning_rate": 5.382705401458465e-07,
+ "loss": 0.3984,
+ "step": 214950
+ },
+ {
+ "epoch": 1.1092709252351396,
+ "grad_norm": 22315.609375,
+ "learning_rate": 5.329379781670196e-07,
+ "loss": 0.4007,
+ "step": 215000
+ },
+ {
+ "epoch": 1.1092709252351396,
+ "eval_loss": 0.3860665559768677,
+ "eval_runtime": 3233.1388,
+ "eval_samples_per_second": 959.167,
+ "eval_steps_per_second": 1.873,
+ "step": 215000
+ }
+ ],
+ "logging_steps": 50,
+ "max_steps": 225000,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 2,
+ "save_steps": 5000,
+ "stateful_callbacks": {
+ "EarlyStoppingCallback": {
+ "args": {
+ "early_stopping_patience": 5,
+ "early_stopping_threshold": 0.0
+ },
+ "attributes": {
+ "early_stopping_patience_counter": 0
+ }
+ },
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3.916937061638367e+17,
+ "train_batch_size": 128,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin b/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..afd49ab13e1adc210b7ee9755ab768f1bc6434dc
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c41aa9c6023a3a9650c2ca731b440abde601b316b41906bb1dab8748c3c13ed
+size 5304
diff --git a/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json b/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json
new file mode 100644
index 0000000000000000000000000000000000000000..54045330cccae0d703647b73183868a84aa6c91f
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/checkpoint-215000/vocab.json
@@ -0,0 +1 @@
+{"A":0,"R":1,"N":2,"D":3,"C":4,"Q":5,"E":6,"G":7,"H":8,"I":9,"L":10,"K":11,"M":12,"F":13,"P":14,"S":15,"T":16,"W":17,"Y":18,"V":19,"X":20,"B":21,"U":22,"Z":23,"O":24,".":25,"-":26,"":27,"":28,"":29,"":30,"":31}
\ No newline at end of file
diff --git a/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0 b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0
new file mode 100644
index 0000000000000000000000000000000000000000..179e01ca216169225277f4a05fde5c4100b6e1db
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940241.amax.612137.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa4b1f7df48a807b1536a66293f8d3c69d22c2f41844975ef08715dd8b0d776
+size 6779
diff --git a/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0 b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0
new file mode 100644
index 0000000000000000000000000000000000000000..8603703ac942f839bf7a3cd5617a5bd4fb3c4d96
--- /dev/null
+++ b/pretrain_glome_nano_model_tiny/logs/events.out.tfevents.1765940354.amax.615566.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85dabbf1e456e1f20a84dc748d61ed39952becc5b14189935f10ff23d484249b
+size 984259