diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..b2d83e30ec4844c9ed86a4aee244540440deb515 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ready_to_train.csv filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb filter=lfs diff=lfs merge=lfs -text
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebf40fb15fa27dea5d9d92a580d1bca519b3f4f3
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,102 @@
+{
+  "<extra_id_0>": 127,
+  "<extra_id_10>": 117,
+  "<extra_id_11>": 116,
+  "<extra_id_12>": 115,
+  "<extra_id_13>": 114,
+  "<extra_id_14>": 113,
+  "<extra_id_15>": 112,
+  "<extra_id_16>": 111,
+  "<extra_id_17>": 110,
+  "<extra_id_18>": 109,
+  "<extra_id_19>": 108,
+  "<extra_id_1>": 126,
+  "<extra_id_20>": 107,
+  "<extra_id_21>": 106,
+  "<extra_id_22>": 105,
+  "<extra_id_23>": 104,
+  "<extra_id_24>": 103,
+  "<extra_id_25>": 102,
+  "<extra_id_26>": 101,
+  "<extra_id_27>": 100,
+  "<extra_id_28>": 99,
+  "<extra_id_29>": 98,
+  "<extra_id_2>": 125,
+  "<extra_id_30>": 97,
+  "<extra_id_31>": 96,
+  "<extra_id_32>": 95,
+  "<extra_id_33>": 94,
+  "<extra_id_34>": 93,
+  "<extra_id_35>": 92,
+  "<extra_id_36>": 91,
+  "<extra_id_37>": 90,
+  "<extra_id_38>": 89,
+  "<extra_id_39>": 88,
+  "<extra_id_3>": 124,
+  "<extra_id_40>": 87,
+  "<extra_id_41>": 86,
+  "<extra_id_42>": 85,
+  "<extra_id_43>": 84,
+  "<extra_id_44>": 83,
+  "<extra_id_45>": 82,
+  "<extra_id_46>": 81,
+  "<extra_id_47>": 80,
+  "<extra_id_48>": 79,
+  "<extra_id_49>": 78,
+  "<extra_id_4>": 123,
+  "<extra_id_50>": 77,
+  "<extra_id_51>": 76,
+  "<extra_id_52>": 75,
+  "<extra_id_53>": 74,
+  "<extra_id_54>": 73,
+  "<extra_id_55>": 72,
+  "<extra_id_56>": 71,
+  "<extra_id_57>": 70,
+  "<extra_id_58>": 69,
+  "<extra_id_59>": 68,
+  "<extra_id_5>": 122,
+  "<extra_id_60>": 67,
+  "<extra_id_61>": 66,
+  "<extra_id_62>": 65,
+  "<extra_id_63>": 64,
+  "<extra_id_64>": 63,
+  "<extra_id_65>": 62,
+  "<extra_id_66>": 61,
+  "<extra_id_67>": 60,
+  "<extra_id_68>": 59,
+  "<extra_id_69>": 58,
+  "<extra_id_6>": 121,
+  "<extra_id_70>": 57,
+  "<extra_id_71>": 56,
+  "<extra_id_72>": 55,
+  "<extra_id_73>": 54,
+  "<extra_id_74>": 53,
+  "<extra_id_75>": 52,
+  "<extra_id_76>": 51,
+  "<extra_id_77>": 50,
+  "<extra_id_78>": 49,
+  "<extra_id_79>": 48,
+  "<extra_id_7>": 120,
+  "<extra_id_80>": 47,
+  "<extra_id_81>": 46,
+  "<extra_id_82>": 45,
+  "<extra_id_83>": 44,
+  "<extra_id_84>": 43,
+  "<extra_id_85>": 42,
+  "<extra_id_86>": 41,
+  "<extra_id_87>": 40,
+  "<extra_id_88>": 39,
+  "<extra_id_89>": 38,
+  "<extra_id_8>": 119,
+  "<extra_id_90>": 37,
+  "<extra_id_91>": 36,
+  "<extra_id_92>": 35,
+  "<extra_id_93>": 34,
+  "<extra_id_94>": 33,
+  "<extra_id_95>": 32,
+  "<extra_id_96>": 31,
+  "<extra_id_97>": 30,
+  "<extra_id_98>": 29,
+  "<extra_id_99>": 28,
+  "<extra_id_9>": 118
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..af1c864b04cc7849b7c1b65f535655a300ecbcc5
--- /dev/null
+++ b/config.json
@@ -0,0 +1,8 @@
+{
+  "architectures": [
+    "T5BinaryClassifier"
+  ],
+  "model_type": "t5",
+  "d_model": 1024,
+  "is_encoder_decoder": false
+}
\ No newline at end of file
diff --git a/finetuning_bc_prott5.py b/finetuning_bc_prott5.py
new file mode 100644
index 0000000000000000000000000000000000000000..25f436bde3fd6bd925a036dbd5adac6cf0f320ae
--- /dev/null
+++ b/finetuning_bc_prott5.py
@@ -0,0 +1,149 @@
+import torch, torch.nn as nn
+from transformers import (T5EncoderModel, T5Tokenizer,
+                          Trainer, TrainingArguments)
+from transformers.modeling_outputs import SequenceClassifierOutput
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score
+import pandas as pd
+import wandb
+from huggingface_hub import login
+import re
+from datasets import Dataset
+
+# ---------------------------
+# 1. GİRİŞ‑ÇIKIŞ ve LOGIN
+# ---------------------------
+
+wandb.login()
+wandb.init(project='finetuning-bc-protT5')
+
+# ---------------------------
+# 2. DATA HAZIRLIK (seninkiler)
+# ---------------------------
+data = pd.read_csv("ready_to_train.csv")
+pos = data.loc[data["SITE_+/-7_AA"].str.len()==15]["SITE_+/-7_AA"].tolist()
+neg = data.loc[data["NON_PH_SITE"].str.len()==15]["NON_PH_SITE"].tolist()
+labels = [1]*len(pos)+[0]*len(neg)
+texts  = pos+neg
+prep_texts = [" ".join(list(t.upper())) for t in texts]
+prep_texts = [re.sub(r"[UZOB]", "X", pt).replace("_","-")for pt in prep_texts]
+ 
+
+from sklearn.model_selection import train_test_split
+X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+X_val, X_test, y_val, y_test     = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)
+
+tokenizer = T5Tokenizer.from_pretrained("Rostlab/prot_t5_xl_uniref50")
+
+def tokenize(batch):
+    return tokenizer(batch["text"],
+                     padding="max_length",
+                     truncation=True,
+                     max_length=64)
+
+def to_hf_dataset(texts, labels):
+    return {"text": texts, "label": labels}
+
+train_ds = Dataset.from_dict({"text": X_train, "label": y_train})
+val_ds   = Dataset.from_dict({"text": X_val,   "label": y_val})
+
+train_ds = train_ds.map(tokenize, batched=True).with_format("torch")
+val_ds   = val_ds.map(tokenize,   batched=True).with_format("torch")
+
+
+
+# ---------------------------
+# 3. MODEL: T5 + Classification Head
+# ---------------------------
+class T5BinaryClassifier(nn.Module):
+    def __init__(self, model_name, dropout=0.1):
+        super().__init__()
+        self.encoder = T5EncoderModel.from_pretrained(model_name)
+        enc_dim      = self.encoder.config.d_model       # 1024 (prot_t5_xl)
+        self.dropout = nn.Dropout(dropout)
+        self.cls     = nn.Linear(enc_dim, 2)             # binary
+
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                labels=None,
+                **kwargs):
+        enc_out = self.encoder(input_ids=input_ids,
+                               attention_mask=attention_mask,
+                               return_dict=True)
+        # [CLS]-benzeri vektör: <pad> token pozisyonu (id=0) yerine mean‑pool
+        hidden = enc_out.last_hidden_state        # (B, L, D)
+        pooled = hidden.mean(dim=1)               # (B, D)
+
+        logits = self.cls(self.dropout(pooled))
+
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(logits, labels)
+
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=enc_out.hidden_states,
+            attentions=enc_out.attentions,
+        )
+
+model = T5BinaryClassifier("Rostlab/prot_t5_xl_uniref50").cuda()
+
+# ---------------------------
+# 4. TRAINING ARGUMENTS
+# ---------------------------
+args = TrainingArguments(
+    output_dir="t5-bc-out",
+    num_train_epochs=3,
+    learning_rate=5e-5,
+    per_device_train_batch_size=8,     # prot_t5_xl büyük; 512 yerine 8‑16 önerilir
+    per_device_eval_batch_size=8,
+    gradient_accumulation_steps=4,     # efektif 32
+    evaluation_strategy="epoch",
+    load_best_model_at_end=True,
+    save_strategy="epoch",
+    save_safetensors=False, 
+    report_to=["wandb"],
+    fp16=True,
+)
+
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = logits.argmax(-1)
+    acc  = accuracy_score(labels, preds)
+    return {"accuracy": acc}
+
+trainer = Trainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=val_ds,
+    compute_metrics=compute_metrics,
+)
+
+trainer.train()
+
+# ---------------------------
+# 5. TEST & SAVE
+# ---------------------------
+
+# Python dict → Hugging Face Dataset
+test_ds = Dataset.from_dict({"text": X_test, "label": y_test})
+
+# Tokenize ve tensor formatına çevir
+test_ds = test_ds.map(tokenize, batched=True).with_format("torch")
+
+metrics  = trainer.evaluate(test_ds)
+print(metrics)
+# ---- Manuel kaydetme ----
+trainer.save_model(
+    "/arf/scratch/zisik/prott5_bc_ft"
+)
+tokenizer.save_pretrained("/arf/scratch/zisik/prott5_bc_ft")
+
+
+#model.push_to_hub("isikz/prot_t5_binary_classifier")
+#tokenizer.push_to_hub("isikz/prot_t5_binary_classifier")
+#wandb.finish()
diff --git a/pytorch_model.bin b/pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a92b054ee34321d706fdba0e59cff8e1a6945bb
--- /dev/null
+++ b/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb84e54c51f53eb1a49e0d52446d9e470b5ea320ae7174917832ab5aef4d31a2
+size 4832674810
diff --git a/ready_to_train.csv b/ready_to_train.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6ccefd360c81076a54229466d245766ab20959ce
--- /dev/null
+++ b/ready_to_train.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:498eaceee30faf2510396e17a4f8417ce65c37e576c8792a80da432313f03c0e
+size 18584710
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..17ade346a1042cbe0c1436f5bedcbd85c099d582
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,125 @@
+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/spiece.model b/spiece.model
new file mode 100644
index 0000000000000000000000000000000000000000..6c3f9cd64e8288f59fac6b5ad7c85cbc17938ffd
--- /dev/null
+++ b/spiece.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74da7b4afcde53faa570114b530c726135bdfcdb813dec3abfb27f9d44db7324
+size 237990
diff --git a/t5-bc-out/checkpoint-47916/optimizer.pt b/t5-bc-out/checkpoint-47916/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b5af208c2eab52b5ee380279553fdfaae416815
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3923cb1c3204d99805be4282d57866443cbdd1f5f71ad6af1c81ee4a783d7e9d
+size 9665321730
diff --git a/t5-bc-out/checkpoint-47916/pytorch_model.bin b/t5-bc-out/checkpoint-47916/pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6e2f3cad7a1e11f71e29927be70fe6e98f2aa55e
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80957033108061961f1d326abe9e2829f4d78524a478d52ecec37db106fbe5cc
+size 4832674810
diff --git a/t5-bc-out/checkpoint-47916/rng_state.pth b/t5-bc-out/checkpoint-47916/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d8b71d0fca00d67f9c6d59efc1fffbd5a7d79baa
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1af688f89b64a7c9246d9d5848b03b2543dd68c97861fab57333014cd508ec2
+size 14244
diff --git a/t5-bc-out/checkpoint-47916/scheduler.pt b/t5-bc-out/checkpoint-47916/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23be9ed08525013187375982b1d0445c0d3cc932
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62074fe1abf3e8558aec193d31cdd76f6c2650659b0c8d62d4b5ff6d20fd6edd
+size 1064
diff --git a/t5-bc-out/checkpoint-47916/trainer_state.json b/t5-bc-out/checkpoint-47916/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..2137097bc4315cb69634f262ff91a78a5ab147ce
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/trainer_state.json
@@ -0,0 +1,725 @@
+{
+  "best_metric": 0.1829579919576645,
+  "best_model_checkpoint": "t5-bc-out/checkpoint-31944",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 47916,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03130478337089907,
+      "grad_norm": 1.3348039388656616,
+      "learning_rate": 4.947825361048502e-05,
+      "loss": 0.5856,
+      "step": 500
+    },
+    {
+      "epoch": 0.06260956674179814,
+      "grad_norm": 2.473144292831421,
+      "learning_rate": 4.8956507220970036e-05,
+      "loss": 0.5183,
+      "step": 1000
+    },
+    {
+      "epoch": 0.09391435011269722,
+      "grad_norm": 3.6210598945617676,
+      "learning_rate": 4.843476083145505e-05,
+      "loss": 0.4879,
+      "step": 1500
+    },
+    {
+      "epoch": 0.12521913348359628,
+      "grad_norm": 6.336288928985596,
+      "learning_rate": 4.791405793471909e-05,
+      "loss": 0.4579,
+      "step": 2000
+    },
+    {
+      "epoch": 0.15652391685449538,
+      "grad_norm": 2.6699299812316895,
+      "learning_rate": 4.739231154520411e-05,
+      "loss": 0.4421,
+      "step": 2500
+    },
+    {
+      "epoch": 0.18782870022539444,
+      "grad_norm": 7.918868064880371,
+      "learning_rate": 4.6870565155689124e-05,
+      "loss": 0.4205,
+      "step": 3000
+    },
+    {
+      "epoch": 0.2191334835962935,
+      "grad_norm": 2.9816083908081055,
+      "learning_rate": 4.634881876617414e-05,
+      "loss": 0.4044,
+      "step": 3500
+    },
+    {
+      "epoch": 0.25043826696719257,
+      "grad_norm": 7.581803321838379,
+      "learning_rate": 4.582707237665916e-05,
+      "loss": 0.3901,
+      "step": 4000
+    },
+    {
+      "epoch": 0.28174305033809166,
+      "grad_norm": 6.031352996826172,
+      "learning_rate": 4.5305325987144174e-05,
+      "loss": 0.3834,
+      "step": 4500
+    },
+    {
+      "epoch": 0.31304783370899075,
+      "grad_norm": 2.581623077392578,
+      "learning_rate": 4.478357959762919e-05,
+      "loss": 0.3601,
+      "step": 5000
+    },
+    {
+      "epoch": 0.3443526170798898,
+      "grad_norm": 4.7024245262146,
+      "learning_rate": 4.42618332081142e-05,
+      "loss": 0.3492,
+      "step": 5500
+    },
+    {
+      "epoch": 0.3756574004507889,
+      "grad_norm": 8.929915428161621,
+      "learning_rate": 4.374217380415728e-05,
+      "loss": 0.3435,
+      "step": 6000
+    },
+    {
+      "epoch": 0.406962183821688,
+      "grad_norm": 3.694370985031128,
+      "learning_rate": 4.32204274146423e-05,
+      "loss": 0.3366,
+      "step": 6500
+    },
+    {
+      "epoch": 0.438266967192587,
+      "grad_norm": 5.6961350440979,
+      "learning_rate": 4.2698681025127307e-05,
+      "loss": 0.3259,
+      "step": 7000
+    },
+    {
+      "epoch": 0.4695717505634861,
+      "grad_norm": 2.740339756011963,
+      "learning_rate": 4.217693463561232e-05,
+      "loss": 0.3224,
+      "step": 7500
+    },
+    {
+      "epoch": 0.5008765339343851,
+      "grad_norm": 3.7285494804382324,
+      "learning_rate": 4.165518824609734e-05,
+      "loss": 0.3103,
+      "step": 8000
+    },
+    {
+      "epoch": 0.5321813173052843,
+      "grad_norm": 5.1480326652526855,
+      "learning_rate": 4.1133441856582356e-05,
+      "loss": 0.3107,
+      "step": 8500
+    },
+    {
+      "epoch": 0.5634861006761833,
+      "grad_norm": 4.8817620277404785,
+      "learning_rate": 4.0611695467067366e-05,
+      "loss": 0.2945,
+      "step": 9000
+    },
+    {
+      "epoch": 0.5947908840470824,
+      "grad_norm": 5.003459453582764,
+      "learning_rate": 4.008994907755238e-05,
+      "loss": 0.2903,
+      "step": 9500
+    },
+    {
+      "epoch": 0.6260956674179815,
+      "grad_norm": 6.451533317565918,
+      "learning_rate": 3.95682026880374e-05,
+      "loss": 0.284,
+      "step": 10000
+    },
+    {
+      "epoch": 0.6574004507888805,
+      "grad_norm": 7.442136287689209,
+      "learning_rate": 3.9046456298522416e-05,
+      "loss": 0.276,
+      "step": 10500
+    },
+    {
+      "epoch": 0.6887052341597796,
+      "grad_norm": 3.617513656616211,
+      "learning_rate": 3.852575340178646e-05,
+      "loss": 0.27,
+      "step": 11000
+    },
+    {
+      "epoch": 0.7200100175306787,
+      "grad_norm": 5.776317596435547,
+      "learning_rate": 3.800400701227148e-05,
+      "loss": 0.2666,
+      "step": 11500
+    },
+    {
+      "epoch": 0.7513148009015778,
+      "grad_norm": 6.264099597930908,
+      "learning_rate": 3.7482260622756494e-05,
+      "loss": 0.257,
+      "step": 12000
+    },
+    {
+      "epoch": 0.7826195842724768,
+      "grad_norm": 4.222651481628418,
+      "learning_rate": 3.6960514233241504e-05,
+      "loss": 0.2566,
+      "step": 12500
+    },
+    {
+      "epoch": 0.813924367643376,
+      "grad_norm": 6.953704833984375,
+      "learning_rate": 3.643876784372652e-05,
+      "loss": 0.2502,
+      "step": 13000
+    },
+    {
+      "epoch": 0.845229151014275,
+      "grad_norm": 3.2264351844787598,
+      "learning_rate": 3.591806494699057e-05,
+      "loss": 0.2364,
+      "step": 13500
+    },
+    {
+      "epoch": 0.876533934385174,
+      "grad_norm": 6.233669281005859,
+      "learning_rate": 3.539631855747558e-05,
+      "loss": 0.2451,
+      "step": 14000
+    },
+    {
+      "epoch": 0.9078387177560732,
+      "grad_norm": 8.540342330932617,
+      "learning_rate": 3.48745721679606e-05,
+      "loss": 0.2364,
+      "step": 14500
+    },
+    {
+      "epoch": 0.9391435011269722,
+      "grad_norm": 4.3881516456604,
+      "learning_rate": 3.4352825778445616e-05,
+      "loss": 0.2312,
+      "step": 15000
+    },
+    {
+      "epoch": 0.9704482844978712,
+      "grad_norm": 6.7153167724609375,
+      "learning_rate": 3.383107938893063e-05,
+      "loss": 0.2323,
+      "step": 15500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9204725991125071,
+      "eval_loss": 0.2026778757572174,
+      "eval_runtime": 180.0542,
+      "eval_samples_per_second": 608.272,
+      "eval_steps_per_second": 76.038,
+      "step": 15972
+    },
+    {
+      "epoch": 1.0017530678687703,
+      "grad_norm": 4.329936504364014,
+      "learning_rate": 3.331037649219468e-05,
+      "loss": 0.2163,
+      "step": 16000
+    },
+    {
+      "epoch": 1.0330578512396693,
+      "grad_norm": 8.806492805480957,
+      "learning_rate": 3.278863010267969e-05,
+      "loss": 0.139,
+      "step": 16500
+    },
+    {
+      "epoch": 1.0643626346105686,
+      "grad_norm": 9.733407020568848,
+      "learning_rate": 3.226688371316471e-05,
+      "loss": 0.1419,
+      "step": 17000
+    },
+    {
+      "epoch": 1.0956674179814676,
+      "grad_norm": 3.5503616333007812,
+      "learning_rate": 3.174513732364972e-05,
+      "loss": 0.1361,
+      "step": 17500
+    },
+    {
+      "epoch": 1.1269722013523666,
+      "grad_norm": 5.853847503662109,
+      "learning_rate": 3.122339093413474e-05,
+      "loss": 0.1398,
+      "step": 18000
+    },
+    {
+      "epoch": 1.1582769847232657,
+      "grad_norm": 1.6936904191970825,
+      "learning_rate": 3.0701644544619754e-05,
+      "loss": 0.1373,
+      "step": 18500
+    },
+    {
+      "epoch": 1.1895817680941647,
+      "grad_norm": 1.5299335718154907,
+      "learning_rate": 3.017989815510477e-05,
+      "loss": 0.1423,
+      "step": 19000
+    },
+    {
+      "epoch": 1.220886551465064,
+      "grad_norm": 3.899322986602783,
+      "learning_rate": 2.965815176558978e-05,
+      "loss": 0.1391,
+      "step": 19500
+    },
+    {
+      "epoch": 1.252191334835963,
+      "grad_norm": 2.3118438720703125,
+      "learning_rate": 2.913744886885383e-05,
+      "loss": 0.1408,
+      "step": 20000
+    },
+    {
+      "epoch": 1.283496118206862,
+      "grad_norm": 0.6930440068244934,
+      "learning_rate": 2.8615702479338845e-05,
+      "loss": 0.1408,
+      "step": 20500
+    },
+    {
+      "epoch": 1.314800901577761,
+      "grad_norm": 2.851909875869751,
+      "learning_rate": 2.8093956089823858e-05,
+      "loss": 0.1404,
+      "step": 21000
+    },
+    {
+      "epoch": 1.3461056849486601,
+      "grad_norm": 0.22848767042160034,
+      "learning_rate": 2.7572209700308875e-05,
+      "loss": 0.1382,
+      "step": 21500
+    },
+    {
+      "epoch": 1.3774104683195592,
+      "grad_norm": 3.973886489868164,
+      "learning_rate": 2.7050463310793888e-05,
+      "loss": 0.1396,
+      "step": 22000
+    },
+    {
+      "epoch": 1.4087152516904582,
+      "grad_norm": 3.140080451965332,
+      "learning_rate": 2.6529760414057936e-05,
+      "loss": 0.127,
+      "step": 22500
+    },
+    {
+      "epoch": 1.4400200350613575,
+      "grad_norm": 5.468123435974121,
+      "learning_rate": 2.6008014024542953e-05,
+      "loss": 0.1276,
+      "step": 23000
+    },
+    {
+      "epoch": 1.4713248184322565,
+      "grad_norm": 0.626640260219574,
+      "learning_rate": 2.5486267635027966e-05,
+      "loss": 0.1219,
+      "step": 23500
+    },
+    {
+      "epoch": 1.5026296018031555,
+      "grad_norm": 3.1899547576904297,
+      "learning_rate": 2.496452124551298e-05,
+      "loss": 0.1319,
+      "step": 24000
+    },
+    {
+      "epoch": 1.5339343851740546,
+      "grad_norm": 3.199150562286377,
+      "learning_rate": 2.4442774855997996e-05,
+      "loss": 0.1298,
+      "step": 24500
+    },
+    {
+      "epoch": 1.5652391685449536,
+      "grad_norm": 5.129565715789795,
+      "learning_rate": 2.3921028466483013e-05,
+      "loss": 0.1217,
+      "step": 25000
+    },
+    {
+      "epoch": 1.5965439519158529,
+      "grad_norm": 4.223311424255371,
+      "learning_rate": 2.339928207696803e-05,
+      "loss": 0.1288,
+      "step": 25500
+    },
+    {
+      "epoch": 1.6278487352867517,
+      "grad_norm": 10.741965293884277,
+      "learning_rate": 2.2877535687453046e-05,
+      "loss": 0.1263,
+      "step": 26000
+    },
+    {
+      "epoch": 1.659153518657651,
+      "grad_norm": 3.0217132568359375,
+      "learning_rate": 2.235578929793806e-05,
+      "loss": 0.122,
+      "step": 26500
+    },
+    {
+      "epoch": 1.69045830202855,
+      "grad_norm": 7.847172737121582,
+      "learning_rate": 2.1835086401202104e-05,
+      "loss": 0.122,
+      "step": 27000
+    },
+    {
+      "epoch": 1.721763085399449,
+      "grad_norm": 9.223713874816895,
+      "learning_rate": 2.1313340011687117e-05,
+      "loss": 0.1266,
+      "step": 27500
+    },
+    {
+      "epoch": 1.7530678687703483,
+      "grad_norm": 2.0706963539123535,
+      "learning_rate": 2.0791593622172137e-05,
+      "loss": 0.1274,
+      "step": 28000
+    },
+    {
+      "epoch": 1.784372652141247,
+      "grad_norm": 3.1475393772125244,
+      "learning_rate": 2.0270890725436182e-05,
+      "loss": 0.1214,
+      "step": 28500
+    },
+    {
+      "epoch": 1.8156774355121463,
+      "grad_norm": 3.7348415851593018,
+      "learning_rate": 1.9749144335921196e-05,
+      "loss": 0.1191,
+      "step": 29000
+    },
+    {
+      "epoch": 1.8469822188830454,
+      "grad_norm": 3.230713129043579,
+      "learning_rate": 1.9227397946406212e-05,
+      "loss": 0.1199,
+      "step": 29500
+    },
+    {
+      "epoch": 1.8782870022539444,
+      "grad_norm": 0.4691683351993561,
+      "learning_rate": 1.8705651556891226e-05,
+      "loss": 0.1176,
+      "step": 30000
+    },
+    {
+      "epoch": 1.9095917856248434,
+      "grad_norm": 4.382262706756592,
+      "learning_rate": 1.8183905167376242e-05,
+      "loss": 0.1176,
+      "step": 30500
+    },
+    {
+      "epoch": 1.9408965689957425,
+      "grad_norm": 9.810182571411133,
+      "learning_rate": 1.7662158777861255e-05,
+      "loss": 0.1083,
+      "step": 31000
+    },
+    {
+      "epoch": 1.9722013523666417,
+      "grad_norm": 8.107538223266602,
+      "learning_rate": 1.7140412388346275e-05,
+      "loss": 0.1103,
+      "step": 31500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9478369642628878,
+      "eval_loss": 0.1829579919576645,
+      "eval_runtime": 179.9731,
+      "eval_samples_per_second": 608.547,
+      "eval_steps_per_second": 76.072,
+      "step": 31944
+    },
+    {
+      "epoch": 2.0035061357375405,
+      "grad_norm": 0.5452843308448792,
+      "learning_rate": 1.661866599883129e-05,
+      "loss": 0.1087,
+      "step": 32000
+    },
+    {
+      "epoch": 2.03481091910844,
+      "grad_norm": 1.0569943189620972,
+      "learning_rate": 1.6097963102095334e-05,
+      "loss": 0.0456,
+      "step": 32500
+    },
+    {
+      "epoch": 2.0661157024793386,
+      "grad_norm": 0.22022764384746552,
+      "learning_rate": 1.557621671258035e-05,
+      "loss": 0.0523,
+      "step": 33000
+    },
+    {
+      "epoch": 2.097420485850238,
+      "grad_norm": 9.75222396850586,
+      "learning_rate": 1.5054470323065365e-05,
+      "loss": 0.0492,
+      "step": 33500
+    },
+    {
+      "epoch": 2.128725269221137,
+      "grad_norm": 3.1281306743621826,
+      "learning_rate": 1.453272393355038e-05,
+      "loss": 0.0498,
+      "step": 34000
+    },
+    {
+      "epoch": 2.160030052592036,
+      "grad_norm": 0.012396792881190777,
+      "learning_rate": 1.4012021036814427e-05,
+      "loss": 0.0506,
+      "step": 34500
+    },
+    {
+      "epoch": 2.191334835962935,
+      "grad_norm": 6.527154922485352,
+      "learning_rate": 1.3490274647299442e-05,
+      "loss": 0.0569,
+      "step": 35000
+    },
+    {
+      "epoch": 2.222639619333834,
+      "grad_norm": 3.5429670810699463,
+      "learning_rate": 1.2968528257784457e-05,
+      "loss": 0.0548,
+      "step": 35500
+    },
+    {
+      "epoch": 2.2539444027047333,
+      "grad_norm": 1.333369255065918,
+      "learning_rate": 1.2446781868269472e-05,
+      "loss": 0.0558,
+      "step": 36000
+    },
+    {
+      "epoch": 2.2852491860756325,
+      "grad_norm": 0.10260029882192612,
+      "learning_rate": 1.1926078971533518e-05,
+      "loss": 0.0464,
+      "step": 36500
+    },
+    {
+      "epoch": 2.3165539694465314,
+      "grad_norm": 0.14060164988040924,
+      "learning_rate": 1.1404332582018533e-05,
+      "loss": 0.0515,
+      "step": 37000
+    },
+    {
+      "epoch": 2.3478587528174306,
+      "grad_norm": 1.031032919883728,
+      "learning_rate": 1.0882586192503548e-05,
+      "loss": 0.0448,
+      "step": 37500
+    },
+    {
+      "epoch": 2.3791635361883294,
+      "grad_norm": 0.20121368765830994,
+      "learning_rate": 1.0360839802988565e-05,
+      "loss": 0.0475,
+      "step": 38000
+    },
+    {
+      "epoch": 2.4104683195592287,
+      "grad_norm": 0.06531311571598053,
+      "learning_rate": 9.84013690625261e-06,
+      "loss": 0.0522,
+      "step": 38500
+    },
+    {
+      "epoch": 2.441773102930128,
+      "grad_norm": 0.04498385637998581,
+      "learning_rate": 9.318390516737625e-06,
+      "loss": 0.0434,
+      "step": 39000
+    },
+    {
+      "epoch": 2.4730778863010268,
+      "grad_norm": 0.3482716679573059,
+      "learning_rate": 8.796644127222641e-06,
+      "loss": 0.0468,
+      "step": 39500
+    },
+    {
+      "epoch": 2.504382669671926,
+      "grad_norm": 4.0475053787231445,
+      "learning_rate": 8.274897737707656e-06,
+      "loss": 0.0505,
+      "step": 40000
+    },
+    {
+      "epoch": 2.535687453042825,
+      "grad_norm": 0.6960127353668213,
+      "learning_rate": 7.753151348192671e-06,
+      "loss": 0.0421,
+      "step": 40500
+    },
+    {
+      "epoch": 2.566992236413724,
+      "grad_norm": 0.8902493119239807,
+      "learning_rate": 7.231404958677686e-06,
+      "loss": 0.0451,
+      "step": 41000
+    },
+    {
+      "epoch": 2.5982970197846234,
+      "grad_norm": 0.46462351083755493,
+      "learning_rate": 6.710702061941732e-06,
+      "loss": 0.0522,
+      "step": 41500
+    },
+    {
+      "epoch": 2.629601803155522,
+      "grad_norm": 0.07463126629590988,
+      "learning_rate": 6.1889556724267476e-06,
+      "loss": 0.0468,
+      "step": 42000
+    },
+    {
+      "epoch": 2.660906586526421,
+      "grad_norm": 0.05138092488050461,
+      "learning_rate": 5.6672092829117625e-06,
+      "loss": 0.0429,
+      "step": 42500
+    },
+    {
+      "epoch": 2.6922113698973202,
+      "grad_norm": 0.06017659977078438,
+      "learning_rate": 5.145462893396778e-06,
+      "loss": 0.038,
+      "step": 43000
+    },
+    {
+      "epoch": 2.7235161532682195,
+      "grad_norm": 3.794154405593872,
+      "learning_rate": 4.624759996660823e-06,
+      "loss": 0.0418,
+      "step": 43500
+    },
+    {
+      "epoch": 2.7548209366391183,
+      "grad_norm": 9.929149627685547,
+      "learning_rate": 4.103013607145838e-06,
+      "loss": 0.0418,
+      "step": 44000
+    },
+    {
+      "epoch": 2.7861257200100176,
+      "grad_norm": 0.10156802833080292,
+      "learning_rate": 3.5812672176308544e-06,
+      "loss": 0.0435,
+      "step": 44500
+    },
+    {
+      "epoch": 2.8174305033809164,
+      "grad_norm": 15.590471267700195,
+      "learning_rate": 3.0595208281158697e-06,
+      "loss": 0.039,
+      "step": 45000
+    },
+    {
+      "epoch": 2.8487352867518156,
+      "grad_norm": 0.1026441678404808,
+      "learning_rate": 2.5377744386008846e-06,
+      "loss": 0.0451,
+      "step": 45500
+    },
+    {
+      "epoch": 2.880040070122715,
+      "grad_norm": 0.08782440423965454,
+      "learning_rate": 2.0160280490859004e-06,
+      "loss": 0.0408,
+      "step": 46000
+    },
+    {
+      "epoch": 2.9113448534936137,
+      "grad_norm": 17.5203857421875,
+      "learning_rate": 1.494281659570916e-06,
+      "loss": 0.0372,
+      "step": 46500
+    },
+    {
+      "epoch": 2.942649636864513,
+      "grad_norm": 0.08832889050245285,
+      "learning_rate": 9.735787628349612e-07,
+      "loss": 0.041,
+      "step": 47000
+    },
+    {
+      "epoch": 2.973954420235412,
+      "grad_norm": 10.057083129882812,
+      "learning_rate": 4.518323733199766e-07,
+      "loss": 0.0417,
+      "step": 47500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9541735906941071,
+      "eval_loss": 0.2335142344236374,
+      "eval_runtime": 176.4196,
+      "eval_samples_per_second": 620.804,
+      "eval_steps_per_second": 77.605,
+      "step": 47916
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 47916,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/t5-bc-out/checkpoint-47916/training_args.bin b/t5-bc-out/checkpoint-47916/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c3466fd00b79452242858a747e6bfe168409a38f
--- /dev/null
+++ b/t5-bc-out/checkpoint-47916/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
+size 5176
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a2b787858aa387c7d376edeef2f479d543ca4012
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,941 @@
+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<extra_id_99>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<extra_id_98>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<extra_id_97>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<extra_id_96>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<extra_id_95>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<extra_id_94>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<extra_id_93>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<extra_id_92>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<extra_id_91>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<extra_id_90>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "38": {
+      "content": "<extra_id_89>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "39": {
+      "content": "<extra_id_88>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "40": {
+      "content": "<extra_id_87>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "41": {
+      "content": "<extra_id_86>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "42": {
+      "content": "<extra_id_85>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "43": {
+      "content": "<extra_id_84>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "44": {
+      "content": "<extra_id_83>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "45": {
+      "content": "<extra_id_82>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "46": {
+      "content": "<extra_id_81>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "47": {
+      "content": "<extra_id_80>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "48": {
+      "content": "<extra_id_79>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "49": {
+      "content": "<extra_id_78>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "50": {
+      "content": "<extra_id_77>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "51": {
+      "content": "<extra_id_76>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "52": {
+      "content": "<extra_id_75>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "53": {
+      "content": "<extra_id_74>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "54": {
+      "content": "<extra_id_73>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "55": {
+      "content": "<extra_id_72>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "56": {
+      "content": "<extra_id_71>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "57": {
+      "content": "<extra_id_70>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "58": {
+      "content": "<extra_id_69>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "59": {
+      "content": "<extra_id_68>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "60": {
+      "content": "<extra_id_67>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "61": {
+      "content": "<extra_id_66>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "62": {
+      "content": "<extra_id_65>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "63": {
+      "content": "<extra_id_64>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "64": {
+      "content": "<extra_id_63>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "65": {
+      "content": "<extra_id_62>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "66": {
+      "content": "<extra_id_61>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "67": {
+      "content": "<extra_id_60>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "68": {
+      "content": "<extra_id_59>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "69": {
+      "content": "<extra_id_58>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "70": {
+      "content": "<extra_id_57>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "71": {
+      "content": "<extra_id_56>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "72": {
+      "content": "<extra_id_55>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "73": {
+      "content": "<extra_id_54>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "74": {
+      "content": "<extra_id_53>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "<extra_id_52>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "76": {
+      "content": "<extra_id_51>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "77": {
+      "content": "<extra_id_50>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "78": {
+      "content": "<extra_id_49>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<extra_id_48>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<extra_id_47>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "81": {
+      "content": "<extra_id_46>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "82": {
+      "content": "<extra_id_45>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "83": {
+      "content": "<extra_id_44>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "84": {
+      "content": "<extra_id_43>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "85": {
+      "content": "<extra_id_42>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "86": {
+      "content": "<extra_id_41>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "87": {
+      "content": "<extra_id_40>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "88": {
+      "content": "<extra_id_39>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "89": {
+      "content": "<extra_id_38>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "90": {
+      "content": "<extra_id_37>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "91": {
+      "content": "<extra_id_36>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "92": {
+      "content": "<extra_id_35>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "93": {
+      "content": "<extra_id_34>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "94": {
+      "content": "<extra_id_33>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "95": {
+      "content": "<extra_id_32>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "96": {
+      "content": "<extra_id_31>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "97": {
+      "content": "<extra_id_30>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "98": {
+      "content": "<extra_id_29>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "99": {
+      "content": "<extra_id_28>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "<extra_id_27>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "<extra_id_26>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "<extra_id_25>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "<extra_id_24>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "<extra_id_23>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "105": {
+      "content": "<extra_id_22>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "106": {
+      "content": "<extra_id_21>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<extra_id_20>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "<extra_id_19>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "109": {
+      "content": "<extra_id_18>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "110": {
+      "content": "<extra_id_17>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "111": {
+      "content": "<extra_id_16>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "112": {
+      "content": "<extra_id_15>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "113": {
+      "content": "<extra_id_14>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "114": {
+      "content": "<extra_id_13>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "115": {
+      "content": "<extra_id_12>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "116": {
+      "content": "<extra_id_11>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "117": {
+      "content": "<extra_id_10>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "118": {
+      "content": "<extra_id_9>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "119": {
+      "content": "<extra_id_8>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "120": {
+      "content": "<extra_id_7>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "121": {
+      "content": "<extra_id_6>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "122": {
+      "content": "<extra_id_5>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "123": {
+      "content": "<extra_id_4>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "124": {
+      "content": "<extra_id_3>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "125": {
+      "content": "<extra_id_2>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "126": {
+      "content": "<extra_id_1>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "127": {
+      "content": "<extra_id_0>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c3466fd00b79452242858a747e6bfe168409a38f
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
+size 5176
diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8f66fe3c7ae770c0e93c28ce15a95a46c40e21af
--- /dev/null
+++ b/wandb/debug-internal.log
@@ -0,0 +1,21 @@
+{"time":"2025-05-04T17:25:03.375857654+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T17:25:03.375905253+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log"}
+{"time":"2025-05-04T17:25:03.501241143+03:00","level":"INFO","msg":"created new stream","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501294637+03:00","level":"INFO","msg":"stream: started","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501448652+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501451145+03:00","level":"INFO","msg":"handler: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501574427+03:00","level":"INFO","msg":"sender: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.865922055+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T22:47:43.191425732+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:01:47.351449692+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:49:32.57779148+03:00","level":"INFO","msg":"stream: closing","id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:32.577842715+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-05T00:49:32.578849729+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-05T00:49:32.781968337+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.781997123+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.782008311+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-05T00:49:33.357099059+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-05T00:49:33.741524339+03:00","level":"INFO","msg":"handler: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741583153+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741593811+03:00","level":"INFO","msg":"sender: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741652369+03:00","level":"INFO","msg":"stream: closed","id":"0ictlmwf"}
diff --git a/wandb/debug.log b/wandb/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..627abd37727afa0dddc772a5f08d1d451156833a
--- /dev/null
+++ b/wandb/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Configure stats pid to 3189710
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():619] calling init triggers
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():669] starting backend
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [wandb_init.py:init():686] backend started and connected
+2025-05-04 17:25:03,379 INFO    MainThread:3189710 [wandb_init.py:init():781] updated telemetry
+2025-05-04 17:25:03,382 INFO    MainThread:3189710 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 17:25:03,852 INFO    MainThread:3189710 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 17:25:05,277 INFO    MainThread:3189710 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 17:25:05,283 INFO    MainThread:3189710 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 17:25:53,069 INFO    MainThread:3189710 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_17-25-43_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-05 00:49:32,578 WARNING MsgRouterThr:3189710 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_132610-pxg645u5/files/config.yaml b/wandb/run-20250504_132610-pxg645u5/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e7549dbe318b236ac4d168d1610ec259f3f67e0
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/files/config.yaml
@@ -0,0 +1,44 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 23
+                - 55
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64
diff --git a/wandb/run-20250504_132610-pxg645u5/files/output.log b/wandb/run-20250504_132610-pxg645u5/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f32d8969878a7a0628870456700492bec8448c62
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/files/output.log
@@ -0,0 +1,37 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
+    train_ds = load_dataset("json", data_files={"train": "-"},
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
+    builder_instance = load_dataset_builder(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
+    dataset_module = dataset_module_factory(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
+    ).get_module()
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
+    data_files = DataFilesDict.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
+    else DataFilesList.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
+    resolve_pattern(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
+    raise FileNotFoundError(error_msg)
+FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
+    train_ds = load_dataset("json", data_files={"train": "-"},
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
+    builder_instance = load_dataset_builder(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
+    dataset_module = dataset_module_factory(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
+    ).get_module()
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
+    data_files = DataFilesDict.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
+    else DataFilesList.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
+    resolve_pattern(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
+    raise FileNotFoundError(error_msg)
+FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
diff --git a/wandb/run-20250504_132610-pxg645u5/files/requirements.txt b/wandb/run-20250504_132610-pxg645u5/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json b/wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..448328b179970362f2471973f31fb58da4f76b55
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T10:26:10.053836Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274767593472"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746613538",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027932",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746354338",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027932",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3156950",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json b/wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..abe7f35e04106235b4471ed10391e2de502bf8a5
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":6}}
\ No newline at end of file
diff --git a/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log b/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..d8927ec645e582bb16b497af54aed2f51506dd14
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T13:26:09.392354119+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmppack6571/port-3156976.txt","pid":3156976,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T13:26:09.392402628+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T13:26:09.393200765+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36685,"Zone":""}}
+{"time":"2025-05-04T13:26:09.393299078+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3156976}
+{"time":"2025-05-04T13:26:09.570123715+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:10.055349971+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"pxg645u5","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:10.180212249+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pxg645u5","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.993053475+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.994546738+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T13:26:16.993862146+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.994899765+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.953982632+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.954000039+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.954015604+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log b/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..90be0a8f62ab298af46980179616b6b5c91f3e29
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T13:26:10.056874799+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T13:26:10.056920353+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log"}
+{"time":"2025-05-04T13:26:10.180146537+03:00","level":"INFO","msg":"created new stream","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180200098+03:00","level":"INFO","msg":"stream: started","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180372555+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180478207+03:00","level":"INFO","msg":"sender: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.18057531+03:00","level":"INFO","msg":"handler: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.587540794+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T13:26:16.993666261+03:00","level":"INFO","msg":"stream: closing","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:16.993748173+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T13:26:16.995793958+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T13:26:17.198876326+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T13:26:17.198909473+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T13:26:17.198920913+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T13:26:17.694743818+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T13:26:17.953755664+03:00","level":"INFO","msg":"handler: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953802728+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953828101+03:00","level":"INFO","msg":"sender: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953904675+03:00","level":"INFO","msg":"stream: closed","id":"pxg645u5"}
diff --git a/wandb/run-20250504_132610-pxg645u5/logs/debug.log b/wandb/run-20250504_132610-pxg645u5/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..468c0395d71efd915d75073afc6774b985f26212
--- /dev/null
+++ b/wandb/run-20250504_132610-pxg645u5/logs/debug.log
@@ -0,0 +1,26 @@
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Configure stats pid to 3156976
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug.log
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():619] calling init triggers
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():669] starting backend
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 13:26:10,052 INFO    MainThread:3156976 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 13:26:10,053 INFO    MainThread:3156976 [wandb_init.py:init():686] backend started and connected
+2025-05-04 13:26:10,061 INFO    MainThread:3156976 [wandb_init.py:init():781] updated telemetry
+2025-05-04 13:26:10,064 INFO    MainThread:3156976 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 13:26:10,574 INFO    MainThread:3156976 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 13:26:12,208 INFO    MainThread:3156976 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 13:26:12,220 INFO    MainThread:3156976 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 13:26:16,995 WARNING MsgRouterThr:3156976 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb b/wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..ebcf26b6563d253be1738d7c6c5bd6f413bdaf9a
Binary files /dev/null and b/wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb differ
diff --git a/wandb/run-20250504_132912-1agsw1y8/files/config.yaml b/wandb/run-20250504_132912-1agsw1y8/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89a4f38c983e370e131179dcc4d572a4d25e65b6
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/files/config.yaml
@@ -0,0 +1,374 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/grad_norm
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/learning_rate
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_13-33-08_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_132912-1agsw1y8/files/output.log b/wandb/run-20250504_132912-1agsw1y8/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..8ca93eec2346930dfe72e70314a1388aa43e22d8
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/files/output.log
@@ -0,0 +1,87 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 511104/511104 [00:20<00:00, 25525.81 examples/s]
+Map: 100%|██████████| 109522/109522 [00:04<00:00, 26956.64 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 13:33:14,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+ 33%|███▎      | 15972/47916 [2:22:01<4:54:49,  1.81it/s]
+{'loss': 0.6947, 'grad_norm': 0.09912440180778503, 'learning_rate': 4.947825361048502e-05, 'epoch': 0.03}
+{'loss': 0.6939, 'grad_norm': 0.23786939680576324, 'learning_rate': 4.8956507220970036e-05, 'epoch': 0.06}
+{'loss': 0.6936, 'grad_norm': 0.10555226355791092, 'learning_rate': 4.843476083145505e-05, 'epoch': 0.09}
+{'loss': 0.6935, 'grad_norm': 0.28058305382728577, 'learning_rate': 4.791301444194006e-05, 'epoch': 0.13}
+{'loss': 0.6937, 'grad_norm': 0.13599741458892822, 'learning_rate': 4.739126805242508e-05, 'epoch': 0.16}
+{'loss': 0.6935, 'grad_norm': 0.13076388835906982, 'learning_rate': 4.6869521662910095e-05, 'epoch': 0.19}
+{'loss': 0.6934, 'grad_norm': 0.1778457760810852, 'learning_rate': 4.634777527339511e-05, 'epoch': 0.22}
+{'loss': 0.6935, 'grad_norm': 0.4112167954444885, 'learning_rate': 4.582602888388012e-05, 'epoch': 0.25}
+{'loss': 0.6934, 'grad_norm': 0.1330016702413559, 'learning_rate': 4.530428249436514e-05, 'epoch': 0.28}
+{'loss': 0.6935, 'grad_norm': 0.09426847100257874, 'learning_rate': 4.478253610485016e-05, 'epoch': 0.31}
+{'loss': 0.6933, 'grad_norm': 0.3686296343803406, 'learning_rate': 4.426078971533517e-05, 'epoch': 0.34}
+{'loss': 0.6933, 'grad_norm': 0.21278153359889984, 'learning_rate': 4.373904332582019e-05, 'epoch': 0.38}
+{'loss': 0.6935, 'grad_norm': 0.23074378073215485, 'learning_rate': 4.321834042908423e-05, 'epoch': 0.41}
+{'loss': 0.6932, 'grad_norm': 0.5192509293556213, 'learning_rate': 4.269659403956925e-05, 'epoch': 0.44}
+{'loss': 0.6932, 'grad_norm': 0.07643919438123703, 'learning_rate': 4.217484765005426e-05, 'epoch': 0.47}
+{'loss': 0.6935, 'grad_norm': 0.09435634315013885, 'learning_rate': 4.1653101260539276e-05, 'epoch': 0.5}
+{'loss': 0.6932, 'grad_norm': 0.3456329107284546, 'learning_rate': 4.113239836380333e-05, 'epoch': 0.53}
+{'loss': 0.6934, 'grad_norm': 0.11689063161611557, 'learning_rate': 4.061065197428834e-05, 'epoch': 0.56}
+{'loss': 0.6934, 'grad_norm': 0.25019219517707825, 'learning_rate': 4.0088905584773355e-05, 'epoch': 0.59}
+{'loss': 0.6933, 'grad_norm': 0.12248441576957703, 'learning_rate': 3.956715919525837e-05, 'epoch': 0.63}
+{'loss': 0.6933, 'grad_norm': 0.11549345403909683, 'learning_rate': 3.9046456298522416e-05, 'epoch': 0.66}
+{'loss': 0.6934, 'grad_norm': 0.27383607625961304, 'learning_rate': 3.852470990900743e-05, 'epoch': 0.69}
+{'loss': 0.6935, 'grad_norm': 0.21311810612678528, 'learning_rate': 3.800296351949245e-05, 'epoch': 0.72}
+{'loss': 0.6933, 'grad_norm': 0.25916823744773865, 'learning_rate': 3.7481217129977466e-05, 'epoch': 0.75}
+{'loss': 0.6934, 'grad_norm': 0.13208124041557312, 'learning_rate': 3.6960514233241504e-05, 'epoch': 0.78}
+{'loss': 0.6934, 'grad_norm': 0.4182877242565155, 'learning_rate': 3.643876784372652e-05, 'epoch': 0.81}
+{'loss': 0.6933, 'grad_norm': 0.19375275075435638, 'learning_rate': 3.5917021454211544e-05, 'epoch': 0.85}
+{'loss': 0.6933, 'grad_norm': 0.1647150218486786, 'learning_rate': 3.5395275064696554e-05, 'epoch': 0.88}
+{'loss': 0.6933, 'grad_norm': 0.458692729473114, 'learning_rate': 3.48745721679606e-05, 'epoch': 0.91}
+{'loss': 0.6933, 'grad_norm': 0.24417555332183838, 'learning_rate': 3.4352825778445616e-05, 'epoch': 0.94}
+{'loss': 0.6932, 'grad_norm': 0.10788150876760483, 'learning_rate': 3.383107938893063e-05, 'epoch': 0.97}
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
+{'eval_loss': 0.6931192278862, 'eval_accuracy': 0.4992604225635032, 'eval_runtime': 182.4166, 'eval_samples_per_second': 600.395, 'eval_steps_per_second': 75.053, 'epoch': 1.0}
+    trainer.train()
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
+    return inner_training_loop(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
+    self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
+    self._save_checkpoint(model, trial, metrics=metrics)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
+    self.save_model(output_dir, _internal_call=True)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
+    safetensors.torch.save_file(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
+    serialize_file(_flatten(tensors), filename, metadata=metadata)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
+    raise RuntimeError(
+RuntimeError:
+            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
+            A potential way to correctly save your model is to use `save_model`.
+            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
+
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
+    trainer.train()
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
+    return inner_training_loop(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
+    self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
+    self._save_checkpoint(model, trial, metrics=metrics)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
+    self.save_model(output_dir, _internal_call=True)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
+    safetensors.torch.save_file(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
+    serialize_file(_flatten(tensors), filename, metadata=metadata)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
+    raise RuntimeError(
+RuntimeError:
+            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
+            A potential way to correctly save your model is to use `save_model`.
+            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
+
diff --git a/wandb/run-20250504_132912-1agsw1y8/files/requirements.txt b/wandb/run-20250504_132912-1agsw1y8/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json b/wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e27daebbacd3a1b6062dd305d598e9d1014c3f16
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T10:29:13.019628Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274768302080"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746613727",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027934",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746354527",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027934",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3157550",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json b/wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..030bbea79bd4d5fc3ae46d01de3f64e2d7ead2c3
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train/learning_rate":3.383107938893063e-05,"train/global_step":15972,"eval/steps_per_second":75.053,"_timestamp":1.7463635035359182e+09,"eval/accuracy":0.4992604225635032,"_step":31,"eval/loss":0.6931192278862,"train/grad_norm":0.10788150876760483,"train/epoch":1,"_wandb":{"runtime":8950},"_runtime":8950.516897928,"train/loss":0.6932,"eval/runtime":182.4166,"eval/samples_per_second":600.395}
\ No newline at end of file
diff --git a/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log b/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..dad0e4abf15eab93aed95168c60fe6412f76a17e
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T13:29:12.35887463+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1u83hfoi/port-3157577.txt","pid":3157577,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T13:29:12.358923345+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T13:29:12.35977753+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45947,"Zone":""}}
+{"time":"2025-05-04T13:29:12.359879073+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3157577}
+{"time":"2025-05-04T13:29:12.546636547+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T13:29:13.02161239+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T13:29:13.145638422+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607250248+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607435128+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T15:58:23.607401252+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607720003+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801882716+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801915389+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801937893+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log b/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..3e1b9c9e1960c66d21bac86084b75cecf9a700d0
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T13:29:13.023253759+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T13:29:13.023302807+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log"}
+{"time":"2025-05-04T13:29:13.145570529+03:00","level":"INFO","msg":"created new stream","id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145625833+03:00","level":"INFO","msg":"stream: started","id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145806528+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145923955+03:00","level":"INFO","msg":"handler: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.146011145+03:00","level":"INFO","msg":"sender: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.51656923+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T15:58:23.607363166+03:00","level":"INFO","msg":"stream: closing","id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:23.607412721+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T15:58:23.608736938+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T15:58:23.995834762+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T15:58:23.995863601+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T15:58:23.995874256+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T15:58:24.53730388+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T15:58:24.801427373+03:00","level":"INFO","msg":"handler: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801476891+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801525233+03:00","level":"INFO","msg":"sender: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801589463+03:00","level":"INFO","msg":"stream: closed","id":"1agsw1y8"}
diff --git a/wandb/run-20250504_132912-1agsw1y8/logs/debug.log b/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..ea208d69901f2374562663d7c34e15b09373c8f9
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Configure stats pid to 3157577
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():619] calling init triggers
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():669] starting backend
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 13:29:13,018 INFO    MainThread:3157577 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 13:29:13,019 INFO    MainThread:3157577 [wandb_init.py:init():686] backend started and connected
+2025-05-04 13:29:13,026 INFO    MainThread:3157577 [wandb_init.py:init():781] updated telemetry
+2025-05-04 13:29:13,030 INFO    MainThread:3157577 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 13:29:13,503 INFO    MainThread:3157577 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 13:29:14,954 INFO    MainThread:3157577 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 13:33:19,417 INFO    MainThread:3157577 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_13-33-08_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 15:58:23,607 WARNING MsgRouterThr:3157577 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb b/wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..d0fe86b920af1550de340c5d128c7edf489a6165
--- /dev/null
+++ b/wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71cf2569d2e508f45833ce35b1904bcc5325f9369eef0a76ea074fad88d8621d
+size 5615901
diff --git a/wandb/run-20250504_160615-f65jh2lv/files/output.log b/wandb/run-20250504_160615-f65jh2lv/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..9f8ad836e7976228186ebb3ee636e8e5558b4888
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/files/output.log
@@ -0,0 +1,8 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 511104/511104 [00:20<00:00, 25304.42 examples/s]
+Map: 100%|██████████| 109522/109522 [00:02<00:00, 36704.44 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:06:52,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+  1%|          | 246/47916 [02:12<7:08:44,  1.85it/s]
diff --git a/wandb/run-20250504_160615-f65jh2lv/files/requirements.txt b/wandb/run-20250504_160615-f65jh2lv/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json b/wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd7b856b87ae9094e8c7410b93fc44a222546cc3
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:06:15.895027Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274886729728"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746623147",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027945",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746363947",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027945",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3178532",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log b/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..618fc3f61177df7804f2fc4a8f211c7313be9c35
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log
@@ -0,0 +1,7 @@
+{"time":"2025-05-04T16:06:15.269316376+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6sywt0mb/port-3178556.txt","pid":3178556,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:06:15.269366219+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:06:15.2702663+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3178556}
+{"time":"2025-05-04T16:06:15.270143057+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37579,"Zone":""}}
+{"time":"2025-05-04T16:06:15.448913658+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:49916"}
+{"time":"2025-05-04T16:06:15.898453126+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
+{"time":"2025-05-04T16:06:16.021719647+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
diff --git a/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log b/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..261eef09aa76e080a35f7789b3265f005f6d0225
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
@@ -0,0 +1,8 @@
+{"time":"2025-05-04T16:06:15.899998659+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:06:15.900045512+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log"}
+{"time":"2025-05-04T16:06:16.021644692+03:00","level":"INFO","msg":"created new stream","id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.021706945+03:00","level":"INFO","msg":"stream: started","id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.021839756+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.02194891+03:00","level":"INFO","msg":"handler: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.022034888+03:00","level":"INFO","msg":"sender: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.421916148+03:00","level":"INFO","msg":"Starting system monitor"}
diff --git a/wandb/run-20250504_160615-f65jh2lv/logs/debug.log b/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..06dd2b2a7d6174fa397a32c411642f714082fa74
--- /dev/null
+++ b/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
@@ -0,0 +1,26 @@
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Configure stats pid to 3178556
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():669] starting backend
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:06:15,893 INFO    MainThread:3178556 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:06:15,894 INFO    MainThread:3178556 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:06:15,902 INFO    MainThread:3178556 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:06:15,905 INFO    MainThread:3178556 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:06:16,414 INFO    MainThread:3178556 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:06:17,992 INFO    MainThread:3178556 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:06:18,004 INFO    MainThread:3178556 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:06:56,772 INFO    MainThread:3178556 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-06-46_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
diff --git a/wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb b/wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..1e8a5f9b3164571a503e4306a04be53481a4529e
Binary files /dev/null and b/wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb differ
diff --git a/wandb/run-20250504_160955-rqk2hbkf/files/config.yaml b/wandb/run-20250504_160955-rqk2hbkf/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7e7549dbe318b236ac4d168d1610ec259f3f67e0
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/files/config.yaml
@@ -0,0 +1,44 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 23
+                - 55
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64
diff --git a/wandb/run-20250504_160955-rqk2hbkf/files/output.log b/wandb/run-20250504_160955-rqk2hbkf/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..54e81f72adc802bd17a6e8b3e973b2290acd5201
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/files/output.log
@@ -0,0 +1,24 @@
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
+    X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
+    return func(*args, **kwargs)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
+    arrays = indexable(*arrays)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
+    check_consistent_length(*result)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
+    raise ValueError(
+ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
+    X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
+    return func(*args, **kwargs)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
+    arrays = indexable(*arrays)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
+    check_consistent_length(*result)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
+    raise ValueError(
+ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
diff --git a/wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt b/wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json b/wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..96c1d759e7d3dd3d826e5a66a823a8a3f9265c9c
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:09:55.928947Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "272740364288"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746623370",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027946",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746364170",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027946",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3179106",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json b/wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d52051e315a7a21a9d9e5a40a517408bb086162
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":2}}
\ No newline at end of file
diff --git a/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log b/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..999d56f784a1c5621e4f166d8ed3d656b4110162
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:09:55.241065297+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmplpbc9pnb/port-3179132.txt","pid":3179132,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:09:55.241124751+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:09:55.241864+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37981,"Zone":""}}
+{"time":"2025-05-04T16:09:55.241967868+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179132}
+{"time":"2025-05-04T16:09:55.428960455+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:55.928508592+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:56.056026556+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597503038+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597631333+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:09:58.597601675+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597793186+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528863432+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528880642+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528893164+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log b/wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..a63c5f27c7d8b0b1be30a0aa81b63cec47472ec9
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:09:55.930352223+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:09:55.930398642+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log"}
+{"time":"2025-05-04T16:09:56.055953645+03:00","level":"INFO","msg":"created new stream","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056013829+03:00","level":"INFO","msg":"stream: started","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056183059+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056291373+03:00","level":"INFO","msg":"sender: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056498843+03:00","level":"INFO","msg":"handler: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.455842701+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:09:58.597599181+03:00","level":"INFO","msg":"stream: closing","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:58.597716873+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:09:58.598825235+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:09:58.792882763+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:09:58.792915401+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:09:58.792926694+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:09:59.286977407+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:09:59.528666057+03:00","level":"INFO","msg":"handler: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528710573+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528726369+03:00","level":"INFO","msg":"sender: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528792264+03:00","level":"INFO","msg":"stream: closed","id":"rqk2hbkf"}
diff --git a/wandb/run-20250504_160955-rqk2hbkf/logs/debug.log b/wandb/run-20250504_160955-rqk2hbkf/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..bb9afff70b842f64258d1cce03b036f94b3b7f15
--- /dev/null
+++ b/wandb/run-20250504_160955-rqk2hbkf/logs/debug.log
@@ -0,0 +1,26 @@
+2025-05-04 16:09:55,914 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Configure stats pid to 3179132
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:09:55,915 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:09:55,916 INFO    MainThread:3179132 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:09:55,916 INFO    MainThread:3179132 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug.log
+2025-05-04 16:09:55,916 INFO    MainThread:3179132 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log
+2025-05-04 16:09:55,917 INFO    MainThread:3179132 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:09:55,917 INFO    MainThread:3179132 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:09:55,917 INFO    MainThread:3179132 [wandb_init.py:init():669] starting backend
+2025-05-04 16:09:55,917 INFO    MainThread:3179132 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:09:55,925 INFO    MainThread:3179132 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:09:55,927 INFO    MainThread:3179132 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:09:55,965 INFO    MainThread:3179132 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:09:55,969 INFO    MainThread:3179132 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:09:56,441 INFO    MainThread:3179132 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:09:57,857 INFO    MainThread:3179132 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:09:57,858 INFO    MainThread:3179132 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:09:57,859 INFO    MainThread:3179132 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:09:57,859 INFO    MainThread:3179132 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:09:57,874 INFO    MainThread:3179132 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:09:58,598 WARNING MsgRouterThr:3179132 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_160955-rqk2hbkf/run-rqk2hbkf.wandb b/wandb/run-20250504_160955-rqk2hbkf/run-rqk2hbkf.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..e9a6c7bc04fa77bdb7e2940e46071101d371b1d3
Binary files /dev/null and b/wandb/run-20250504_160955-rqk2hbkf/run-rqk2hbkf.wandb differ
diff --git a/wandb/run-20250504_161246-rdbtc2pz/files/config.yaml b/wandb/run-20250504_161246-rdbtc2pz/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..901a1d15058a51157e2bae9ec48a096a58e60825
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/files/config.yaml
@@ -0,0 +1,357 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_16-12-52_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_161246-rdbtc2pz/files/output.log b/wandb/run-20250504_161246-rdbtc2pz/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..19a60f451615e772fad3d7c838cfbc32af90c5ca
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/files/output.log
@@ -0,0 +1,27 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 70/70 [00:00<00:00, 4499.50 examples/s]
+Map: 100%|██████████| 15/15 [00:00<00:00, 2515.68 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:12:57,595] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+100%|██████████| 6/6 [01:04<00:00, 10.71s/it]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3408.53 examples/s]
+{'eval_loss': 0.2836913764476776, 'eval_accuracy': 1.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 179.205, 'eval_steps_per_second': 23.894, 'epoch': 0.89}
+{'eval_loss': 0.10505779087543488, 'eval_accuracy': 1.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 172.624, 'eval_steps_per_second': 23.017, 'epoch': 1.78}
+{'eval_loss': 0.05776570364832878, 'eval_accuracy': 1.0, 'eval_runtime': 0.1, 'eval_samples_per_second': 149.979, 'eval_steps_per_second': 19.997, 'epoch': 2.67}
+{'train_runtime': 64.2466, 'train_samples_per_second': 3.269, 'train_steps_per_second': 0.093, 'train_loss': 0.3210471471150716, 'epoch': 2.67}
+100%|██████████| 2/2 [00:00<00:00, 77.74it/s]
+{'eval_loss': 0.05800781771540642, 'eval_accuracy': 1.0, 'eval_runtime': 0.0642, 'eval_samples_per_second': 233.689, 'eval_steps_per_second': 31.158, 'epoch': 2.6666666666666665}
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    model.push_to_hub("isikz/prot_t5_binary_classifier")
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
+    raise AttributeError(
+AttributeError: 'T5BinaryClassifier' object has no attribute 'push_to_hub'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    model.push_to_hub("isikz/prot_t5_binary_classifier")
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
+    raise AttributeError(
+AttributeError: 'T5BinaryClassifier' object has no attribute 'push_to_hub'
diff --git a/wandb/run-20250504_161246-rdbtc2pz/files/requirements.txt b/wandb/run-20250504_161246-rdbtc2pz/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_161246-rdbtc2pz/files/wandb-metadata.json b/wandb/run-20250504_161246-rdbtc2pz/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..d7d40733d28be0ffdf7ad38c1cd91cd7308f5fd5
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:12:46.058889Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274907410432"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746623540",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027947",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746364340",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027947",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3179500",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_161246-rdbtc2pz/files/wandb-summary.json b/wandb/run-20250504_161246-rdbtc2pz/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..ca1304c1a6f3387bb206e3ac8c9bfa50dd878c77
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train_loss":0.3210471471150716,"_runtime":80.142129451,"train_runtime":64.2466,"eval/loss":0.05800781771540642,"eval/steps_per_second":31.158,"total_flos":0,"eval/samples_per_second":233.689,"train/global_step":6,"_timestamp":1.746364446200474e+09,"train_samples_per_second":3.269,"_wandb":{"runtime":80},"eval/runtime":0.0642,"train_steps_per_second":0.093,"train/epoch":2.6666666666666665,"eval/accuracy":1,"_step":4}
\ No newline at end of file
diff --git a/wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log b/wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..2233811df4108abf9e6d1a7a308e7fd9f315ac85
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:12:45.059197409+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmphflqkva1/port-3179526.txt","pid":3179526,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:12:45.059250836+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:12:45.060076988+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179526}
+{"time":"2025-05-04T16:12:45.059982306+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45921,"Zone":""}}
+{"time":"2025-05-04T16:12:45.246915089+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:12:46.063164622+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rdbtc2pz","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:12:46.187062148+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rdbtc2pz","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:06.269673416+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:06.269788395+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:06.26984398+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:14:06.269980058+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:07.608460726+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:07.608482723+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:33132"}
+{"time":"2025-05-04T16:14:07.60849804+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log b/wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..2f9c168a41928938cfb3d4e4e4131691e000328e
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:12:46.065859772+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:12:46.065909143+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug-core.log"}
+{"time":"2025-05-04T16:12:46.186999454+03:00","level":"INFO","msg":"created new stream","id":"rdbtc2pz"}
+{"time":"2025-05-04T16:12:46.187050012+03:00","level":"INFO","msg":"stream: started","id":"rdbtc2pz"}
+{"time":"2025-05-04T16:12:46.187228889+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:12:46.187328701+03:00","level":"INFO","msg":"handler: started","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:12:46.187417103+03:00","level":"INFO","msg":"sender: started","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:12:46.598141294+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:14:06.269782406+03:00","level":"INFO","msg":"stream: closing","id":"rdbtc2pz"}
+{"time":"2025-05-04T16:14:06.269825637+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:14:06.270879471+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:14:06.55541099+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:14:06.555433954+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:14:06.555445965+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:14:07.09767572+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:14:07.607443104+03:00","level":"INFO","msg":"handler: closed","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:14:07.607487355+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:14:07.607532609+03:00","level":"INFO","msg":"sender: closed","stream_id":"rdbtc2pz"}
+{"time":"2025-05-04T16:14:07.607587557+03:00","level":"INFO","msg":"stream: closed","id":"rdbtc2pz"}
diff --git a/wandb/run-20250504_161246-rdbtc2pz/logs/debug.log b/wandb/run-20250504_161246-rdbtc2pz/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..349cfbd59b697951167c42dd519765d328645a03
--- /dev/null
+++ b/wandb/run-20250504_161246-rdbtc2pz/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 16:12:46,051 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Configure stats pid to 3179526
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:12:46,052 INFO    MainThread:3179526 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug.log
+2025-05-04 16:12:46,053 INFO    MainThread:3179526 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_161246-rdbtc2pz/logs/debug-internal.log
+2025-05-04 16:12:46,053 INFO    MainThread:3179526 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:12:46,053 INFO    MainThread:3179526 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:12:46,053 INFO    MainThread:3179526 [wandb_init.py:init():669] starting backend
+2025-05-04 16:12:46,053 INFO    MainThread:3179526 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:12:46,057 INFO    MainThread:3179526 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:12:46,058 INFO    MainThread:3179526 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:12:46,064 INFO    MainThread:3179526 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:12:46,067 INFO    MainThread:3179526 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:12:46,584 INFO    MainThread:3179526 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:12:47,966 INFO    MainThread:3179526 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:12:47,966 INFO    MainThread:3179526 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:12:47,966 INFO    MainThread:3179526 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:12:47,966 INFO    MainThread:3179526 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:12:47,974 INFO    MainThread:3179526 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:13:01,857 INFO    MainThread:3179526 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-12-52_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 16:14:06,270 WARNING MsgRouterThr:3179526 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_161246-rdbtc2pz/run-rdbtc2pz.wandb b/wandb/run-20250504_161246-rdbtc2pz/run-rdbtc2pz.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..f57225fe3777b25bea8c60ee43eed186f1df565a
Binary files /dev/null and b/wandb/run-20250504_161246-rdbtc2pz/run-rdbtc2pz.wandb differ
diff --git a/wandb/run-20250504_162343-cp870jym/files/config.yaml b/wandb/run-20250504_162343-cp870jym/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..18d70a8edeb099baad34583edef28bf48cb9585b
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/files/config.yaml
@@ -0,0 +1,357 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_16-23-49_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_162343-cp870jym/files/output.log b/wandb/run-20250504_162343-cp870jym/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..37d4dd8bc142bc7bd0e5821b6fd7fc2418f4768a
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/files/output.log
@@ -0,0 +1,27 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 70/70 [00:00<00:00, 4479.59 examples/s]
+Map: 100%|██████████| 15/15 [00:00<00:00, 2556.26 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:23:55,053] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+100%|██████████| 6/6 [01:08<00:00, 11.47s/it]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3414.44 examples/s]
+{'eval_loss': 0.32496747374534607, 'eval_accuracy': 1.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 158.536, 'eval_steps_per_second': 21.138, 'epoch': 0.89}
+{'eval_loss': 0.14126792550086975, 'eval_accuracy': 1.0, 'eval_runtime': 0.0935, 'eval_samples_per_second': 160.347, 'eval_steps_per_second': 21.38, 'epoch': 1.78}
+{'eval_loss': 0.08305665105581284, 'eval_accuracy': 1.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 172.874, 'eval_steps_per_second': 23.05, 'epoch': 2.67}
+{'train_runtime': 68.815, 'train_samples_per_second': 3.052, 'train_steps_per_second': 0.087, 'train_loss': 0.34361688296000165, 'epoch': 2.67}
+100%|██████████| 2/2 [00:00<00:00, 93.00it/s]
+{'eval_loss': 0.07820229977369308, 'eval_accuracy': 1.0, 'eval_runtime': 0.0516, 'eval_samples_per_second': 290.667, 'eval_steps_per_second': 38.756, 'epoch': 2.6666666666666665}
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    model.save_pretrained(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
+    raise AttributeError(
+AttributeError: 'T5BinaryClassifier' object has no attribute 'save_pretrained'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    model.save_pretrained(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1928, in __getattr__
+    raise AttributeError(
+AttributeError: 'T5BinaryClassifier' object has no attribute 'save_pretrained'
diff --git a/wandb/run-20250504_162343-cp870jym/files/requirements.txt b/wandb/run-20250504_162343-cp870jym/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_162343-cp870jym/files/wandb-metadata.json b/wandb/run-20250504_162343-cp870jym/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..e5bd8ffd94d157531e5b2d7abc7c46e50d9074ff
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:23:43.746737Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274884100096"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746624198",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027950",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746364998",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027950",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3180708",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_162343-cp870jym/files/wandb-summary.json b/wandb/run-20250504_162343-cp870jym/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9e34e6f68682beece2671803867da11fe15a3c5
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_step":4,"_runtime":84.708140457,"train_runtime":68.815,"eval/runtime":0.0516,"_wandb":{"runtime":84},"train_samples_per_second":3.052,"train/epoch":2.6666666666666665,"eval/loss":0.07820229977369308,"train_loss":0.34361688296000165,"total_flos":0,"_timestamp":1.7463651084544086e+09,"eval/samples_per_second":290.667,"eval/accuracy":1,"train_steps_per_second":0.087,"train/global_step":6,"eval/steps_per_second":38.756}
\ No newline at end of file
diff --git a/wandb/run-20250504_162343-cp870jym/logs/debug-core.log b/wandb/run-20250504_162343-cp870jym/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..1e2f80d79d66ceb2fd9940e2b195bd656cbe50a6
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:23:43.103970405+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpgvzpqnd2/port-3180737.txt","pid":3180737,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:23:43.104018+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:23:43.104795371+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39787,"Zone":""}}
+{"time":"2025-05-04T16:23:43.104898929+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3180737}
+{"time":"2025-05-04T16:23:43.291758092+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:23:43.748521574+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"cp870jym","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:23:43.873512977+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cp870jym","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:08.531174232+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:08.531307956+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:08.531367815+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:25:08.53150429+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:09.788149247+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:09.788183611+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:38582"}
+{"time":"2025-05-04T16:25:09.788206528+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_162343-cp870jym/logs/debug-internal.log b/wandb/run-20250504_162343-cp870jym/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..1625047ba9735e4a64500c381492219a264c5df4
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:23:43.750249064+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:23:43.750294337+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162343-cp870jym/logs/debug-core.log"}
+{"time":"2025-05-04T16:23:43.873441585+03:00","level":"INFO","msg":"created new stream","id":"cp870jym"}
+{"time":"2025-05-04T16:23:43.873500609+03:00","level":"INFO","msg":"stream: started","id":"cp870jym"}
+{"time":"2025-05-04T16:23:43.873652279+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:23:43.873745942+03:00","level":"INFO","msg":"handler: started","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:23:43.873943316+03:00","level":"INFO","msg":"sender: started","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:23:44.451037367+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:25:08.531294356+03:00","level":"INFO","msg":"stream: closing","id":"cp870jym"}
+{"time":"2025-05-04T16:25:08.531341197+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:25:08.532383047+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:25:08.797985156+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:25:08.798011707+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:25:08.798022316+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:25:09.301751579+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:25:09.787364957+03:00","level":"INFO","msg":"handler: closed","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:25:09.787438823+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:25:09.78745243+03:00","level":"INFO","msg":"sender: closed","stream_id":"cp870jym"}
+{"time":"2025-05-04T16:25:09.787535096+03:00","level":"INFO","msg":"stream: closed","id":"cp870jym"}
diff --git a/wandb/run-20250504_162343-cp870jym/logs/debug.log b/wandb/run-20250504_162343-cp870jym/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..a5def13ec74b1339547213aa42656f477507efbb
--- /dev/null
+++ b/wandb/run-20250504_162343-cp870jym/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 16:23:43,738 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:23:43,738 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Configure stats pid to 3180737
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162343-cp870jym/logs/debug.log
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162343-cp870jym/logs/debug-internal.log
+2025-05-04 16:23:43,739 INFO    MainThread:3180737 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:23:43,740 INFO    MainThread:3180737 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:23:43,740 INFO    MainThread:3180737 [wandb_init.py:init():669] starting backend
+2025-05-04 16:23:43,740 INFO    MainThread:3180737 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:23:43,745 INFO    MainThread:3180737 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:23:43,746 INFO    MainThread:3180737 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:23:43,754 INFO    MainThread:3180737 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:23:43,757 INFO    MainThread:3180737 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:23:44,437 INFO    MainThread:3180737 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:23:45,830 INFO    MainThread:3180737 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:23:45,831 INFO    MainThread:3180737 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:23:45,831 INFO    MainThread:3180737 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:23:45,832 INFO    MainThread:3180737 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:23:45,842 INFO    MainThread:3180737 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:23:59,567 INFO    MainThread:3180737 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-23-49_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 16:25:08,531 WARNING MsgRouterThr:3180737 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_162343-cp870jym/run-cp870jym.wandb b/wandb/run-20250504_162343-cp870jym/run-cp870jym.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..948b5b7c23ce67858ab71bf4a99f3cfd510cbee6
Binary files /dev/null and b/wandb/run-20250504_162343-cp870jym/run-cp870jym.wandb differ
diff --git a/wandb/run-20250504_162813-vqs6o6w5/files/config.yaml b/wandb/run-20250504_162813-vqs6o6w5/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e371277d6ebafd880eb06c8f0ef37b936a3a706
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/files/config.yaml
@@ -0,0 +1,357 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_16-28-19_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_162813-vqs6o6w5/files/output.log b/wandb/run-20250504_162813-vqs6o6w5/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..a8998b57d109ed7312a9c2e3f41928ec6b4905de
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/files/output.log
@@ -0,0 +1,23 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 70/70 [00:00<00:00, 4467.73 examples/s]
+Map: 100%|██████████| 15/15 [00:00<00:00, 2557.19 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:28:25,523] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+100%|██████████| 6/6 [01:06<00:00, 11.03s/it]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3353.30 examples/s]
+{'eval_loss': 0.23444823920726776, 'eval_accuracy': 1.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 178.164, 'eval_steps_per_second': 23.755, 'epoch': 0.89}
+{'eval_loss': 0.08114013075828552, 'eval_accuracy': 1.0, 'eval_runtime': 0.0928, 'eval_samples_per_second': 161.657, 'eval_steps_per_second': 21.554, 'epoch': 1.78}
+{'eval_loss': 0.0510762594640255, 'eval_accuracy': 1.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 190.397, 'eval_steps_per_second': 25.386, 'epoch': 2.67}
+{'train_runtime': 66.2064, 'train_samples_per_second': 3.172, 'train_steps_per_second': 0.091, 'train_loss': 0.281462828318278, 'epoch': 2.67}
+100%|██████████| 2/2 [00:00<00:00, 90.79it/s]
+{'eval_loss': 0.046335864812135696, 'eval_accuracy': 1.0, 'eval_runtime': 0.0528, 'eval_samples_per_second': 284.031, 'eval_steps_per_second': 37.871, 'epoch': 2.6666666666666665}
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    trainer.save_model(
+TypeError: Trainer.save_model() got an unexpected keyword argument 'safe_serialization'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    trainer.save_model(
+TypeError: Trainer.save_model() got an unexpected keyword argument 'safe_serialization'
diff --git a/wandb/run-20250504_162813-vqs6o6w5/files/requirements.txt b/wandb/run-20250504_162813-vqs6o6w5/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_162813-vqs6o6w5/files/wandb-metadata.json b/wandb/run-20250504_162813-vqs6o6w5/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..4cdef3cdbd7a79ab90f8929362f8e799e456f186
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:28:13.563930Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274899660800"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746624467",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027952",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746365267",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027952",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3182008",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_162813-vqs6o6w5/files/wandb-summary.json b/wandb/run-20250504_162813-vqs6o6w5/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..35b5e4df9660cdaaebd8d4bd033fa5210a1d5fcd
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train_steps_per_second":0.091,"eval/samples_per_second":284.031,"_step":4,"eval/runtime":0.0528,"eval/loss":0.046335864812135696,"total_flos":0,"train_runtime":66.2064,"train_loss":0.281462828318278,"eval/accuracy":1,"_timestamp":1.746365376058223e+09,"_runtime":82.494806798,"train/epoch":2.6666666666666665,"train_samples_per_second":3.172,"_wandb":{"runtime":82},"eval/steps_per_second":37.871,"train/global_step":6}
\ No newline at end of file
diff --git a/wandb/run-20250504_162813-vqs6o6w5/logs/debug-core.log b/wandb/run-20250504_162813-vqs6o6w5/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..ed0d11f5576631e8906b885ebeffe69cd840f942
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:28:12.92389089+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpsu_1e075/port-3182035.txt","pid":3182035,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:28:12.923946336+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:28:12.924930159+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3182035}
+{"time":"2025-05-04T16:28:12.924790098+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":40313,"Zone":""}}
+{"time":"2025-05-04T16:28:13.10973957+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:28:13.567567491+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"vqs6o6w5","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:28:13.69241432+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"vqs6o6w5","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:36.127793865+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:36.127929839+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:29:36.127907923+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:36.128125509+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:37.558291716+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:37.558314076+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:59452"}
+{"time":"2025-05-04T16:29:37.558329488+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_162813-vqs6o6w5/logs/debug-internal.log b/wandb/run-20250504_162813-vqs6o6w5/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..92f3f80ba91679919e80e56dd8abd8e950015b71
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:28:13.569618821+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:28:13.56966556+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162813-vqs6o6w5/logs/debug-core.log"}
+{"time":"2025-05-04T16:28:13.692347406+03:00","level":"INFO","msg":"created new stream","id":"vqs6o6w5"}
+{"time":"2025-05-04T16:28:13.692401835+03:00","level":"INFO","msg":"stream: started","id":"vqs6o6w5"}
+{"time":"2025-05-04T16:28:13.692589976+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:28:13.69268191+03:00","level":"INFO","msg":"handler: started","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:28:13.692686366+03:00","level":"INFO","msg":"sender: started","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:28:14.077830252+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:29:36.127909239+03:00","level":"INFO","msg":"stream: closing","id":"vqs6o6w5"}
+{"time":"2025-05-04T16:29:36.127953372+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:29:36.129135049+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:29:36.381385986+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:29:36.381410641+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:29:36.381421107+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:29:36.890657991+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:29:37.557157658+03:00","level":"INFO","msg":"handler: closed","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:29:37.55721188+03:00","level":"INFO","msg":"sender: closed","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:29:37.557201882+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"vqs6o6w5"}
+{"time":"2025-05-04T16:29:37.557304847+03:00","level":"INFO","msg":"stream: closed","id":"vqs6o6w5"}
diff --git a/wandb/run-20250504_162813-vqs6o6w5/logs/debug.log b/wandb/run-20250504_162813-vqs6o6w5/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..0fb69f8e411995c37137af59a45087557d8c1802
--- /dev/null
+++ b/wandb/run-20250504_162813-vqs6o6w5/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 16:28:13,556 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Configure stats pid to 3182035
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:28:13,557 INFO    MainThread:3182035 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162813-vqs6o6w5/logs/debug.log
+2025-05-04 16:28:13,558 INFO    MainThread:3182035 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_162813-vqs6o6w5/logs/debug-internal.log
+2025-05-04 16:28:13,558 INFO    MainThread:3182035 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:28:13,558 INFO    MainThread:3182035 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:28:13,558 INFO    MainThread:3182035 [wandb_init.py:init():669] starting backend
+2025-05-04 16:28:13,558 INFO    MainThread:3182035 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:28:13,562 INFO    MainThread:3182035 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:28:13,563 INFO    MainThread:3182035 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:28:13,569 INFO    MainThread:3182035 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:28:13,572 INFO    MainThread:3182035 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:28:14,063 INFO    MainThread:3182035 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:28:15,452 INFO    MainThread:3182035 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:28:15,452 INFO    MainThread:3182035 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:28:15,452 INFO    MainThread:3182035 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:28:15,452 INFO    MainThread:3182035 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:28:15,461 INFO    MainThread:3182035 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:28:29,768 INFO    MainThread:3182035 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-28-19_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 16:29:36,128 WARNING MsgRouterThr:3182035 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_162813-vqs6o6w5/run-vqs6o6w5.wandb b/wandb/run-20250504_162813-vqs6o6w5/run-vqs6o6w5.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..a738d1c054d6cee9c4fee21d6554ff8c952d6250
Binary files /dev/null and b/wandb/run-20250504_162813-vqs6o6w5/run-vqs6o6w5.wandb differ
diff --git a/wandb/run-20250504_163202-a8cxeqmf/files/config.yaml b/wandb/run-20250504_163202-a8cxeqmf/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b134dd5782bfb1efdf79a02c28eb4463bbe598a7
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/files/config.yaml
@@ -0,0 +1,357 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_16-32-08_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_163202-a8cxeqmf/files/output.log b/wandb/run-20250504_163202-a8cxeqmf/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..3df36c5de543befcbdc45eb6aa9b4dd90fcb1682
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/files/output.log
@@ -0,0 +1,35 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 70/70 [00:00<00:00, 4245.80 examples/s]
+Map: 100%|██████████| 15/15 [00:00<00:00, 2515.98 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:32:13,990] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+100%|██████████| 6/6 [01:07<00:00, 11.21s/it]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3498.75 examples/s]
+{'eval_loss': 0.28029781579971313, 'eval_accuracy': 1.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 180.04, 'eval_steps_per_second': 24.005, 'epoch': 0.89}
+{'eval_loss': 0.1000773161649704, 'eval_accuracy': 1.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 173.864, 'eval_steps_per_second': 23.182, 'epoch': 1.78}
+{'eval_loss': 0.05684203654527664, 'eval_accuracy': 1.0, 'eval_runtime': 0.0937, 'eval_samples_per_second': 160.033, 'eval_steps_per_second': 21.338, 'epoch': 2.67}
+{'train_runtime': 67.2983, 'train_samples_per_second': 3.12, 'train_steps_per_second': 0.089, 'train_loss': 0.31141672531763714, 'epoch': 2.67}
+100%|██████████| 2/2 [00:00<00:00, 101.26it/s]
+{'eval_loss': 0.04954631254076958, 'eval_accuracy': 1.0, 'eval_runtime': 0.0471, 'eval_samples_per_second': 318.692, 'eval_steps_per_second': 42.492, 'epoch': 2.6666666666666665}
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    trainer.save_model(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3704, in _save
+    os.makedirs(output_dir, exist_ok=True)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/os.py", line 225, in makedirs
+    mkdir(name, mode)
+PermissionError: [Errno 13] Permission denied: '/prott5_bc_ft'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 141, in <module>
+    trainer.save_model(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3704, in _save
+    os.makedirs(output_dir, exist_ok=True)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/os.py", line 225, in makedirs
+    mkdir(name, mode)
+PermissionError: [Errno 13] Permission denied: '/prott5_bc_ft'
diff --git a/wandb/run-20250504_163202-a8cxeqmf/files/requirements.txt b/wandb/run-20250504_163202-a8cxeqmf/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_163202-a8cxeqmf/files/wandb-metadata.json b/wandb/run-20250504_163202-a8cxeqmf/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..43011783c61f4ffb7c1e598cc8f3542d15c2ce5a
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:32:02.055600Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274920230912"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746624696",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027955",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746365496",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027955",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3182736",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_163202-a8cxeqmf/files/wandb-summary.json b/wandb/run-20250504_163202-a8cxeqmf/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..ef3a5d0d977ffd8ab00ee2541f1ee061080e14a6
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/files/wandb-summary.json
@@ -0,0 +1 @@
+{"_wandb":{"runtime":83},"train_loss":0.31141672531763714,"train_steps_per_second":0.089,"eval/accuracy":1,"eval/steps_per_second":42.492,"train_samples_per_second":3.12,"eval/samples_per_second":318.692,"train/global_step":6,"train_runtime":67.2983,"_runtime":83.580626717,"_timestamp":1.746365605635744e+09,"eval/loss":0.04954631254076958,"train/epoch":2.6666666666666665,"total_flos":0,"eval/runtime":0.0471,"_step":4}
\ No newline at end of file
diff --git a/wandb/run-20250504_163202-a8cxeqmf/logs/debug-core.log b/wandb/run-20250504_163202-a8cxeqmf/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..4d5cf6f58a5c9fc81b4569f485360bbc66a4434a
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:32:01.418393778+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmplvhvsc_q/port-3182760.txt","pid":3182760,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:32:01.418441665+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:32:01.419205535+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45799,"Zone":""}}
+{"time":"2025-05-04T16:32:01.419534072+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3182760}
+{"time":"2025-05-04T16:32:01.606152917+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:32:02.057688618+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"a8cxeqmf","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:32:02.186607102+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"a8cxeqmf","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:25.702060103+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:25.702177617+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:25.702273436+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:33:25.702373794+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:26.974600306+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:26.974618713+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48576"}
+{"time":"2025-05-04T16:33:26.974630492+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_163202-a8cxeqmf/logs/debug-internal.log b/wandb/run-20250504_163202-a8cxeqmf/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..ebf1e141889b4f95835db70d0596ae304d399be9
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:32:02.059376166+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:32:02.059422726+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163202-a8cxeqmf/logs/debug-core.log"}
+{"time":"2025-05-04T16:32:02.18652874+03:00","level":"INFO","msg":"created new stream","id":"a8cxeqmf"}
+{"time":"2025-05-04T16:32:02.186595094+03:00","level":"INFO","msg":"stream: started","id":"a8cxeqmf"}
+{"time":"2025-05-04T16:32:02.18671057+03:00","level":"INFO","msg":"handler: started","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:32:02.186759328+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:32:02.186873015+03:00","level":"INFO","msg":"sender: started","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:32:02.609103171+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:33:25.702185181+03:00","level":"INFO","msg":"stream: closing","id":"a8cxeqmf"}
+{"time":"2025-05-04T16:33:25.702250772+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:33:25.703241445+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:33:25.984446677+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:33:25.98447338+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:33:25.984484498+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:33:26.497866306+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:33:26.973748978+03:00","level":"INFO","msg":"handler: closed","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:33:26.973797312+03:00","level":"INFO","msg":"sender: closed","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:33:26.973781655+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"a8cxeqmf"}
+{"time":"2025-05-04T16:33:26.973934766+03:00","level":"INFO","msg":"stream: closed","id":"a8cxeqmf"}
diff --git a/wandb/run-20250504_163202-a8cxeqmf/logs/debug.log b/wandb/run-20250504_163202-a8cxeqmf/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..2d0d3a8a5a0e5c625f7eb9466c7bbde46220ad7b
--- /dev/null
+++ b/wandb/run-20250504_163202-a8cxeqmf/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Configure stats pid to 3182760
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:32:02,049 INFO    MainThread:3182760 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163202-a8cxeqmf/logs/debug.log
+2025-05-04 16:32:02,050 INFO    MainThread:3182760 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163202-a8cxeqmf/logs/debug-internal.log
+2025-05-04 16:32:02,050 INFO    MainThread:3182760 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:32:02,050 INFO    MainThread:3182760 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:32:02,050 INFO    MainThread:3182760 [wandb_init.py:init():669] starting backend
+2025-05-04 16:32:02,050 INFO    MainThread:3182760 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:32:02,054 INFO    MainThread:3182760 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:32:02,055 INFO    MainThread:3182760 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:32:02,063 INFO    MainThread:3182760 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:32:02,066 INFO    MainThread:3182760 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:32:02,595 INFO    MainThread:3182760 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:32:03,942 INFO    MainThread:3182760 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:32:03,942 INFO    MainThread:3182760 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:32:03,942 INFO    MainThread:3182760 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:32:03,943 INFO    MainThread:3182760 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:32:03,951 INFO    MainThread:3182760 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:32:18,271 INFO    MainThread:3182760 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-32-08_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 16:33:25,702 WARNING MsgRouterThr:3182760 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_163202-a8cxeqmf/run-a8cxeqmf.wandb b/wandb/run-20250504_163202-a8cxeqmf/run-a8cxeqmf.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..6a806abc084b97c4e0ea4f3b996341285fb7d3e9
Binary files /dev/null and b/wandb/run-20250504_163202-a8cxeqmf/run-a8cxeqmf.wandb differ
diff --git a/wandb/run-20250504_163644-j17n0z1w/files/config.yaml b/wandb/run-20250504_163644-j17n0z1w/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..47672e8d6eabcdaeb89962fefba89c107dc6703e
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/files/config.yaml
@@ -0,0 +1,357 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_16-36-51_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_163644-j17n0z1w/files/output.log b/wandb/run-20250504_163644-j17n0z1w/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..789e4c60cde818136ebb5c5f55d9196d2dbffb5e
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/files/output.log
@@ -0,0 +1,15 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 70/70 [00:00<00:00, 6893.99 examples/s]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3422.06 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:36:56,534] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+100%|██████████| 6/6 [01:09<00:00, 11.54s/it]
+Map: 100%|██████████| 15/15 [00:00<00:00, 3398.03 examples/s]
+{'eval_loss': 0.30133458971977234, 'eval_accuracy': 1.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 178.602, 'eval_steps_per_second': 23.814, 'epoch': 0.89}
+{'eval_loss': 0.14025470614433289, 'eval_accuracy': 1.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 166.815, 'eval_steps_per_second': 22.242, 'epoch': 1.78}
+{'eval_loss': 0.09236248582601547, 'eval_accuracy': 1.0, 'eval_runtime': 0.0606, 'eval_samples_per_second': 247.332, 'eval_steps_per_second': 32.978, 'epoch': 2.67}
+{'train_runtime': 69.2309, 'train_samples_per_second': 3.033, 'train_steps_per_second': 0.087, 'train_loss': 0.34036485354105633, 'epoch': 2.67}
+100%|██████████| 2/2 [00:00<00:00, 93.34it/s]
+{'eval_loss': 0.09890136122703552, 'eval_accuracy': 1.0, 'eval_runtime': 0.0503, 'eval_samples_per_second': 298.458, 'eval_steps_per_second': 39.794, 'epoch': 2.6666666666666665}
diff --git a/wandb/run-20250504_163644-j17n0z1w/files/requirements.txt b/wandb/run-20250504_163644-j17n0z1w/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_163644-j17n0z1w/files/wandb-metadata.json b/wandb/run-20250504_163644-j17n0z1w/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..661f0244432fdb8428ff70df9987a780c88edab2
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:36:44.683493Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274930868224"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746624978",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027956",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746365778",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027956",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3183359",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_163644-j17n0z1w/files/wandb-summary.json b/wandb/run-20250504_163644-j17n0z1w/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff8192ac678b3079d8ae2b540b83b8069023ccca
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/files/wandb-summary.json
@@ -0,0 +1 @@
+{"eval/loss":0.09890136122703552,"_timestamp":1.746365889900344e+09,"train_steps_per_second":0.087,"train/epoch":2.6666666666666665,"eval/accuracy":1,"_wandb":{"runtime":90},"total_flos":0,"train_samples_per_second":3.033,"eval/samples_per_second":298.458,"eval/steps_per_second":39.794,"train_runtime":69.2309,"eval/runtime":0.0503,"train_loss":0.34036485354105633,"_step":4,"train/global_step":6,"_runtime":85.217340117}
\ No newline at end of file
diff --git a/wandb/run-20250504_163644-j17n0z1w/logs/debug-core.log b/wandb/run-20250504_163644-j17n0z1w/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..740d60d9f71c8c587518099b5e357d43e8786f46
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T16:36:43.800622213+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpudnn84p2/port-3183386.txt","pid":3183386,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:36:43.800675477+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:36:43.801533455+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3183386}
+{"time":"2025-05-04T16:36:43.801429105+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34585,"Zone":""}}
+{"time":"2025-05-04T16:36:43.98511968+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:36:44.686997088+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"j17n0z1w","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:36:44.811113202+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"j17n0z1w","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:15.462653307+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:15.462760405+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:15.462866235+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:38:15.462928073+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:16.450021056+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:16.450050764+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42446"}
+{"time":"2025-05-04T16:38:16.450073997+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_163644-j17n0z1w/logs/debug-internal.log b/wandb/run-20250504_163644-j17n0z1w/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..702df31c286faaf6c26783e1a598776dc6727960
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/logs/debug-internal.log
@@ -0,0 +1,19 @@
+{"time":"2025-05-04T16:36:44.68958036+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:36:44.68962696+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163644-j17n0z1w/logs/debug-core.log"}
+{"time":"2025-05-04T16:36:44.811045191+03:00","level":"INFO","msg":"created new stream","id":"j17n0z1w"}
+{"time":"2025-05-04T16:36:44.81110033+03:00","level":"INFO","msg":"stream: started","id":"j17n0z1w"}
+{"time":"2025-05-04T16:36:44.811127326+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:36:44.812597235+03:00","level":"INFO","msg":"handler: started","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:36:44.812682202+03:00","level":"INFO","msg":"sender: started","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:36:45.2302005+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:38:15.462763223+03:00","level":"INFO","msg":"stream: closing","id":"j17n0z1w"}
+{"time":"2025-05-04T16:38:15.462833186+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:38:15.463959432+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:38:15.653986013+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:38:15.654018889+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:38:15.654030152+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:38:16.194806616+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:38:16.449749922+03:00","level":"INFO","msg":"handler: closed","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:38:16.449817209+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:38:16.449847499+03:00","level":"INFO","msg":"sender: closed","stream_id":"j17n0z1w"}
+{"time":"2025-05-04T16:38:16.449922381+03:00","level":"INFO","msg":"stream: closed","id":"j17n0z1w"}
diff --git a/wandb/run-20250504_163644-j17n0z1w/logs/debug.log b/wandb/run-20250504_163644-j17n0z1w/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..d737c7f055847a85314b84c35816a14c7b1b12cf
--- /dev/null
+++ b/wandb/run-20250504_163644-j17n0z1w/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 16:36:44,676 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:36:44,676 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Configure stats pid to 3183386
+2025-05-04 16:36:44,676 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163644-j17n0z1w/logs/debug.log
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_163644-j17n0z1w/logs/debug-internal.log
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:36:44,677 INFO    MainThread:3183386 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:36:44,678 INFO    MainThread:3183386 [wandb_init.py:init():669] starting backend
+2025-05-04 16:36:44,678 INFO    MainThread:3183386 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:36:44,682 INFO    MainThread:3183386 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:36:44,683 INFO    MainThread:3183386 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:36:44,690 INFO    MainThread:3183386 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:36:44,693 INFO    MainThread:3183386 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:36:45,217 INFO    MainThread:3183386 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:36:46,645 INFO    MainThread:3183386 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:36:46,645 INFO    MainThread:3183386 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:36:46,645 INFO    MainThread:3183386 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:36:46,645 INFO    MainThread:3183386 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:36:46,651 INFO    MainThread:3183386 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:37:00,590 INFO    MainThread:3183386 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-36-51_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 16:38:15,463 WARNING MsgRouterThr:3183386 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_163644-j17n0z1w/run-j17n0z1w.wandb b/wandb/run-20250504_163644-j17n0z1w/run-j17n0z1w.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..dd24d8341cb77819cc99d2fe0367ee037f5874e8
Binary files /dev/null and b/wandb/run-20250504_163644-j17n0z1w/run-j17n0z1w.wandb differ
diff --git a/wandb/run-20250504_172503-0ictlmwf/files/config.yaml b/wandb/run-20250504_172503-0ictlmwf/files/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90819acdbf6ef774fd8d0e6d0cd98145d182ec2b
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/files/config.yaml
@@ -0,0 +1,375 @@
+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/grad_norm
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/learning_rate
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_17-25-43_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: false
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0
diff --git a/wandb/run-20250504_172503-0ictlmwf/files/output.log b/wandb/run-20250504_172503-0ictlmwf/files/output.log
new file mode 100644
index 0000000000000000000000000000000000000000..f279e4f7e0075186a5bddae1ec00f2da2afeb33d
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/files/output.log
@@ -0,0 +1,110 @@
+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 511104/511104 [00:29<00:00, 17366.65 examples/s]
+Map: 100%|██████████| 109522/109522 [00:04<00:00, 26402.34 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 17:25:48,879] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+ 33%|███▎      | 15972/47916 [2:23:32<4:53:49,  1.81it/s]
+{'loss': 0.5856, 'grad_norm': 1.3348039388656616, 'learning_rate': 4.947825361048502e-05, 'epoch': 0.03}
+{'loss': 0.5183, 'grad_norm': 2.473144292831421, 'learning_rate': 4.8956507220970036e-05, 'epoch': 0.06}
+{'loss': 0.4879, 'grad_norm': 3.6210598945617676, 'learning_rate': 4.843476083145505e-05, 'epoch': 0.09}
+{'loss': 0.4579, 'grad_norm': 6.336288928985596, 'learning_rate': 4.791405793471909e-05, 'epoch': 0.13}
+{'loss': 0.4421, 'grad_norm': 2.6699299812316895, 'learning_rate': 4.739231154520411e-05, 'epoch': 0.16}
+{'loss': 0.4205, 'grad_norm': 7.918868064880371, 'learning_rate': 4.6870565155689124e-05, 'epoch': 0.19}
+{'loss': 0.4044, 'grad_norm': 2.9816083908081055, 'learning_rate': 4.634881876617414e-05, 'epoch': 0.22}
+{'loss': 0.3901, 'grad_norm': 7.581803321838379, 'learning_rate': 4.582707237665916e-05, 'epoch': 0.25}
+{'loss': 0.3834, 'grad_norm': 6.031352996826172, 'learning_rate': 4.5305325987144174e-05, 'epoch': 0.28}
+{'loss': 0.3601, 'grad_norm': 2.581623077392578, 'learning_rate': 4.478357959762919e-05, 'epoch': 0.31}
+{'loss': 0.3492, 'grad_norm': 4.7024245262146, 'learning_rate': 4.42618332081142e-05, 'epoch': 0.34}
+{'loss': 0.3435, 'grad_norm': 8.929915428161621, 'learning_rate': 4.374217380415728e-05, 'epoch': 0.38}
+{'loss': 0.3366, 'grad_norm': 3.694370985031128, 'learning_rate': 4.32204274146423e-05, 'epoch': 0.41}
+{'loss': 0.3259, 'grad_norm': 5.6961350440979, 'learning_rate': 4.2698681025127307e-05, 'epoch': 0.44}
+{'loss': 0.3224, 'grad_norm': 2.740339756011963, 'learning_rate': 4.217693463561232e-05, 'epoch': 0.47}
+{'loss': 0.3103, 'grad_norm': 3.7285494804382324, 'learning_rate': 4.165518824609734e-05, 'epoch': 0.5}
+{'loss': 0.3107, 'grad_norm': 5.1480326652526855, 'learning_rate': 4.1133441856582356e-05, 'epoch': 0.53}
+{'loss': 0.2945, 'grad_norm': 4.8817620277404785, 'learning_rate': 4.0611695467067366e-05, 'epoch': 0.56}
+{'loss': 0.2903, 'grad_norm': 5.003459453582764, 'learning_rate': 4.008994907755238e-05, 'epoch': 0.59}
+{'loss': 0.284, 'grad_norm': 6.451533317565918, 'learning_rate': 3.95682026880374e-05, 'epoch': 0.63}
+{'loss': 0.276, 'grad_norm': 7.442136287689209, 'learning_rate': 3.9046456298522416e-05, 'epoch': 0.66}
+{'loss': 0.27, 'grad_norm': 3.617513656616211, 'learning_rate': 3.852575340178646e-05, 'epoch': 0.69}
+{'loss': 0.2666, 'grad_norm': 5.776317596435547, 'learning_rate': 3.800400701227148e-05, 'epoch': 0.72}
+{'loss': 0.257, 'grad_norm': 6.264099597930908, 'learning_rate': 3.7482260622756494e-05, 'epoch': 0.75}
+{'loss': 0.2566, 'grad_norm': 4.222651481628418, 'learning_rate': 3.6960514233241504e-05, 'epoch': 0.78}
+{'loss': 0.2502, 'grad_norm': 6.953704833984375, 'learning_rate': 3.643876784372652e-05, 'epoch': 0.81}
+{'loss': 0.2364, 'grad_norm': 3.2264351844787598, 'learning_rate': 3.591806494699057e-05, 'epoch': 0.85}
+{'loss': 0.2451, 'grad_norm': 6.233669281005859, 'learning_rate': 3.539631855747558e-05, 'epoch': 0.88}
+{'loss': 0.2364, 'grad_norm': 8.540342330932617, 'learning_rate': 3.48745721679606e-05, 'epoch': 0.91}
+{'loss': 0.2312, 'grad_norm': 4.3881516456604, 'learning_rate': 3.4352825778445616e-05, 'epoch': 0.94}
+{'loss': 0.2323, 'grad_norm': 6.7153167724609375, 'learning_rate': 3.383107938893063e-05, 'epoch': 0.97}
+                                                     
+{'eval_loss': 0.2026778757572174, 'eval_accuracy': 0.9204725991125071, 'eval_runtime': 180.0542, 'eval_samples_per_second': 608.272, 'eval_steps_per_second': 76.038, 'epoch': 1.0}
+{'loss': 0.2163, 'grad_norm': 4.329936504364014, 'learning_rate': 3.331037649219468e-05, 'epoch': 1.0}
+{'loss': 0.139, 'grad_norm': 8.806492805480957, 'learning_rate': 3.278863010267969e-05, 'epoch': 1.03}
+{'loss': 0.1419, 'grad_norm': 9.733407020568848, 'learning_rate': 3.226688371316471e-05, 'epoch': 1.06}
+{'loss': 0.1361, 'grad_norm': 3.5503616333007812, 'learning_rate': 3.174513732364972e-05, 'epoch': 1.1}
+{'loss': 0.1398, 'grad_norm': 5.853847503662109, 'learning_rate': 3.122339093413474e-05, 'epoch': 1.13}
+{'loss': 0.1373, 'grad_norm': 1.6936904191970825, 'learning_rate': 3.0701644544619754e-05, 'epoch': 1.16}
+{'loss': 0.1423, 'grad_norm': 1.5299335718154907, 'learning_rate': 3.017989815510477e-05, 'epoch': 1.19}
+{'loss': 0.1391, 'grad_norm': 3.899322986602783, 'learning_rate': 2.965815176558978e-05, 'epoch': 1.22}
+{'loss': 0.1408, 'grad_norm': 2.3118438720703125, 'learning_rate': 2.913744886885383e-05, 'epoch': 1.25}
+{'loss': 0.1408, 'grad_norm': 0.6930440068244934, 'learning_rate': 2.8615702479338845e-05, 'epoch': 1.28}
+{'loss': 0.1404, 'grad_norm': 2.851909875869751, 'learning_rate': 2.8093956089823858e-05, 'epoch': 1.31}
+{'loss': 0.1382, 'grad_norm': 0.22848767042160034, 'learning_rate': 2.7572209700308875e-05, 'epoch': 1.35}
+{'loss': 0.1396, 'grad_norm': 3.973886489868164, 'learning_rate': 2.7050463310793888e-05, 'epoch': 1.38}
+{'loss': 0.127, 'grad_norm': 3.140080451965332, 'learning_rate': 2.6529760414057936e-05, 'epoch': 1.41}
+{'loss': 0.1276, 'grad_norm': 5.468123435974121, 'learning_rate': 2.6008014024542953e-05, 'epoch': 1.44}
+{'loss': 0.1219, 'grad_norm': 0.626640260219574, 'learning_rate': 2.5486267635027966e-05, 'epoch': 1.47}
+{'loss': 0.1319, 'grad_norm': 3.1899547576904297, 'learning_rate': 2.496452124551298e-05, 'epoch': 1.5}
+{'loss': 0.1298, 'grad_norm': 3.199150562286377, 'learning_rate': 2.4442774855997996e-05, 'epoch': 1.53}
+{'loss': 0.1217, 'grad_norm': 5.129565715789795, 'learning_rate': 2.3921028466483013e-05, 'epoch': 1.57}
+{'loss': 0.1288, 'grad_norm': 4.223311424255371, 'learning_rate': 2.339928207696803e-05, 'epoch': 1.6}
+{'loss': 0.1263, 'grad_norm': 10.741965293884277, 'learning_rate': 2.2877535687453046e-05, 'epoch': 1.63}
+{'loss': 0.122, 'grad_norm': 3.0217132568359375, 'learning_rate': 2.235578929793806e-05, 'epoch': 1.66}
+{'loss': 0.122, 'grad_norm': 7.847172737121582, 'learning_rate': 2.1835086401202104e-05, 'epoch': 1.69}
+{'loss': 0.1266, 'grad_norm': 9.223713874816895, 'learning_rate': 2.1313340011687117e-05, 'epoch': 1.72}
+{'loss': 0.1274, 'grad_norm': 2.0706963539123535, 'learning_rate': 2.0791593622172137e-05, 'epoch': 1.75}
+{'loss': 0.1214, 'grad_norm': 3.1475393772125244, 'learning_rate': 2.0270890725436182e-05, 'epoch': 1.78}
+{'loss': 0.1191, 'grad_norm': 3.7348415851593018, 'learning_rate': 1.9749144335921196e-05, 'epoch': 1.82}
+{'loss': 0.1199, 'grad_norm': 3.230713129043579, 'learning_rate': 1.9227397946406212e-05, 'epoch': 1.85}
+{'loss': 0.1176, 'grad_norm': 0.4691683351993561, 'learning_rate': 1.8705651556891226e-05, 'epoch': 1.88}
+{'loss': 0.1176, 'grad_norm': 4.382262706756592, 'learning_rate': 1.8183905167376242e-05, 'epoch': 1.91}
+{'loss': 0.1083, 'grad_norm': 9.810182571411133, 'learning_rate': 1.7662158777861255e-05, 'epoch': 1.94}
+{'loss': 0.1103, 'grad_norm': 8.107538223266602, 'learning_rate': 1.7140412388346275e-05, 'epoch': 1.97}
+{'eval_loss': 0.1829579919576645, 'eval_accuracy': 0.9478369642628878, 'eval_runtime': 179.9731, 'eval_samples_per_second': 608.547, 'eval_steps_per_second': 76.072, 'epoch': 2.0}
+{'loss': 0.1087, 'grad_norm': 0.5452843308448792, 'learning_rate': 1.661866599883129e-05, 'epoch': 2.0}
+{'loss': 0.0456, 'grad_norm': 1.0569943189620972, 'learning_rate': 1.6097963102095334e-05, 'epoch': 2.03}
+{'loss': 0.0523, 'grad_norm': 0.22022764384746552, 'learning_rate': 1.557621671258035e-05, 'epoch': 2.07}
+{'loss': 0.0492, 'grad_norm': 9.75222396850586, 'learning_rate': 1.5054470323065365e-05, 'epoch': 2.1}
+{'loss': 0.0498, 'grad_norm': 3.1281306743621826, 'learning_rate': 1.453272393355038e-05, 'epoch': 2.13}
+{'loss': 0.0506, 'grad_norm': 0.012396792881190777, 'learning_rate': 1.4012021036814427e-05, 'epoch': 2.16}
+{'loss': 0.0569, 'grad_norm': 6.527154922485352, 'learning_rate': 1.3490274647299442e-05, 'epoch': 2.19}
+{'loss': 0.0548, 'grad_norm': 3.5429670810699463, 'learning_rate': 1.2968528257784457e-05, 'epoch': 2.22}
+{'loss': 0.0558, 'grad_norm': 1.333369255065918, 'learning_rate': 1.2446781868269472e-05, 'epoch': 2.25}
+{'loss': 0.0464, 'grad_norm': 0.10260029882192612, 'learning_rate': 1.1926078971533518e-05, 'epoch': 2.29}
+{'loss': 0.0515, 'grad_norm': 0.14060164988040924, 'learning_rate': 1.1404332582018533e-05, 'epoch': 2.32}
+{'loss': 0.0448, 'grad_norm': 1.031032919883728, 'learning_rate': 1.0882586192503548e-05, 'epoch': 2.35}
+{'loss': 0.0475, 'grad_norm': 0.20121368765830994, 'learning_rate': 1.0360839802988565e-05, 'epoch': 2.38}
+{'loss': 0.0522, 'grad_norm': 0.06531311571598053, 'learning_rate': 9.84013690625261e-06, 'epoch': 2.41}
+{'loss': 0.0434, 'grad_norm': 0.04498385637998581, 'learning_rate': 9.318390516737625e-06, 'epoch': 2.44}
+{'loss': 0.0468, 'grad_norm': 0.3482716679573059, 'learning_rate': 8.796644127222641e-06, 'epoch': 2.47}
+{'loss': 0.0505, 'grad_norm': 4.0475053787231445, 'learning_rate': 8.274897737707656e-06, 'epoch': 2.5}
+{'loss': 0.0421, 'grad_norm': 0.6960127353668213, 'learning_rate': 7.753151348192671e-06, 'epoch': 2.54}
+{'loss': 0.0451, 'grad_norm': 0.8902493119239807, 'learning_rate': 7.231404958677686e-06, 'epoch': 2.57}
+{'loss': 0.0522, 'grad_norm': 0.46462351083755493, 'learning_rate': 6.710702061941732e-06, 'epoch': 2.6}
+{'loss': 0.0468, 'grad_norm': 0.07463126629590988, 'learning_rate': 6.1889556724267476e-06, 'epoch': 2.63}
+{'loss': 0.0429, 'grad_norm': 0.05138092488050461, 'learning_rate': 5.6672092829117625e-06, 'epoch': 2.66}
+{'loss': 0.038, 'grad_norm': 0.06017659977078438, 'learning_rate': 5.145462893396778e-06, 'epoch': 2.69}
+{'loss': 0.0418, 'grad_norm': 3.794154405593872, 'learning_rate': 4.624759996660823e-06, 'epoch': 2.72}
+{'loss': 0.0418, 'grad_norm': 9.929149627685547, 'learning_rate': 4.103013607145838e-06, 'epoch': 2.75}
+{'loss': 0.0435, 'grad_norm': 0.10156802833080292, 'learning_rate': 3.5812672176308544e-06, 'epoch': 2.79}
+{'loss': 0.039, 'grad_norm': 15.590471267700195, 'learning_rate': 3.0595208281158697e-06, 'epoch': 2.82}
+{'loss': 0.0451, 'grad_norm': 0.1026441678404808, 'learning_rate': 2.5377744386008846e-06, 'epoch': 2.85}
+{'loss': 0.0408, 'grad_norm': 0.08782440423965454, 'learning_rate': 2.0160280490859004e-06, 'epoch': 2.88}
+{'loss': 0.0372, 'grad_norm': 17.5203857421875, 'learning_rate': 1.494281659570916e-06, 'epoch': 2.91}
+{'loss': 0.041, 'grad_norm': 0.08832889050245285, 'learning_rate': 9.735787628349612e-07, 'epoch': 2.94}
+{'loss': 0.0417, 'grad_norm': 10.057083129882812, 'learning_rate': 4.518323733199766e-07, 'epoch': 2.97}
+{'eval_loss': 0.2335142344236374, 'eval_accuracy': 0.9541735906941071, 'eval_runtime': 176.4196, 'eval_samples_per_second': 620.804, 'eval_steps_per_second': 77.605, 'epoch': 3.0}
+{'train_runtime': 26437.5455, 'train_samples_per_second': 57.998, 'train_steps_per_second': 1.812, 'train_loss': 0.1687752874718155, 'epoch': 3.0}
+100%|██████████| 13691/13691 [02:53<00:00, 78.95it/s]
+{'eval_loss': 0.17655357718467712, 'eval_accuracy': 0.9493257124074396, 'eval_runtime': 173.4293, 'eval_samples_per_second': 631.514, 'eval_steps_per_second': 78.943, 'epoch': 3.0}
diff --git a/wandb/run-20250504_172503-0ictlmwf/files/requirements.txt b/wandb/run-20250504_172503-0ictlmwf/files/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..847c45ecccb522de294762faeeb01fe5fb02f7ac
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/files/requirements.txt
@@ -0,0 +1,541 @@
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0
diff --git a/wandb/run-20250504_172503-0ictlmwf/files/wandb-metadata.json b/wandb/run-20250504_172503-0ictlmwf/files/wandb-metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3c892eb99e110ed3a340c5c88fca4d4e7601345
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/files/wandb-metadata.json
@@ -0,0 +1,77 @@
+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T14:25:03.372176Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274939207680"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746627878",
+    "job_gid":  "11636",
+    "job_gpus":  "3",
+    "job_id":  "1027971",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746368678",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027971",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3189684",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}
\ No newline at end of file
diff --git a/wandb/run-20250504_172503-0ictlmwf/files/wandb-summary.json b/wandb/run-20250504_172503-0ictlmwf/files/wandb-summary.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d540fd3ad4df49061ca8c1d3c6754d4ef243041
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/files/wandb-summary.json
@@ -0,0 +1 @@
+{"train/epoch":3,"_step":99,"train/grad_norm":10.057083129882812,"train/learning_rate":4.518323733199766e-07,"eval/steps_per_second":78.943,"train/loss":0.0417,"train_steps_per_second":1.812,"_wandb":{"runtime":26669},"total_flos":0,"eval/loss":0.17655357718467712,"train_loss":0.1687752874718155,"train_runtime":26437.5455,"eval/accuracy":0.9493257124074396,"_timestamp":1.7463953681937246e+09,"train_samples_per_second":57.998,"eval/runtime":173.4293,"train/global_step":47916,"eval/samples_per_second":631.514,"_runtime":26664.822052477}
\ No newline at end of file
diff --git a/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log b/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log
new file mode 100644
index 0000000000000000000000000000000000000000..be6c40900e077df04434c2058af02e8279bfd761
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log
@@ -0,0 +1,14 @@
+{"time":"2025-05-04T17:25:02.471184127+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpiwo6benn/port-3189710.txt","pid":3189710,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T17:25:02.471231751+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T17:25:02.472141348+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":33653,"Zone":""}}
+{"time":"2025-05-04T17:25:02.472268534+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3189710}
+{"time":"2025-05-04T17:25:02.658908169+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:54060"}
+{"time":"2025-05-04T17:25:03.374276784+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"0ictlmwf","id":"127.0.0.1:54060"}
+{"time":"2025-05-04T17:25:03.501310319+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"0ictlmwf","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:32.577676717+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:32.577829204+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-05T00:49:32.577794736+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:32.578007826+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:33.742564589+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:33.742592681+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:54060"}
+{"time":"2025-05-05T00:49:33.742613896+03:00","level":"INFO","msg":"server is closed"}
diff --git a/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log b/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
new file mode 100644
index 0000000000000000000000000000000000000000..8f66fe3c7ae770c0e93c28ce15a95a46c40e21af
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
@@ -0,0 +1,21 @@
+{"time":"2025-05-04T17:25:03.375857654+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T17:25:03.375905253+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log"}
+{"time":"2025-05-04T17:25:03.501241143+03:00","level":"INFO","msg":"created new stream","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501294637+03:00","level":"INFO","msg":"stream: started","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501448652+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501451145+03:00","level":"INFO","msg":"handler: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501574427+03:00","level":"INFO","msg":"sender: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.865922055+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T22:47:43.191425732+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:01:47.351449692+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:49:32.57779148+03:00","level":"INFO","msg":"stream: closing","id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:32.577842715+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-05T00:49:32.578849729+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-05T00:49:32.781968337+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.781997123+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.782008311+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-05T00:49:33.357099059+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-05T00:49:33.741524339+03:00","level":"INFO","msg":"handler: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741583153+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741593811+03:00","level":"INFO","msg":"sender: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741652369+03:00","level":"INFO","msg":"stream: closed","id":"0ictlmwf"}
diff --git a/wandb/run-20250504_172503-0ictlmwf/logs/debug.log b/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
new file mode 100644
index 0000000000000000000000000000000000000000..627abd37727afa0dddc772a5f08d1d451156833a
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
@@ -0,0 +1,27 @@
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Configure stats pid to 3189710
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():619] calling init triggers
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():669] starting backend
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [wandb_init.py:init():686] backend started and connected
+2025-05-04 17:25:03,379 INFO    MainThread:3189710 [wandb_init.py:init():781] updated telemetry
+2025-05-04 17:25:03,382 INFO    MainThread:3189710 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 17:25:03,852 INFO    MainThread:3189710 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 17:25:05,277 INFO    MainThread:3189710 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 17:25:05,283 INFO    MainThread:3189710 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 17:25:53,069 INFO    MainThread:3189710 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_17-25-43_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-05 00:49:32,578 WARNING MsgRouterThr:3189710 [router.py:message_loop():75] message_loop has been closed
diff --git a/wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb b/wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb
new file mode 100644
index 0000000000000000000000000000000000000000..f8cc6f4ca642c6b7dd3b09e91225198cbf4cc952
--- /dev/null
+++ b/wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:006a1cac0ce47f9249802031630141a9f36a796d14d7c386b77939289e4b498e
+size 17270813