Training in progress, epoch 1

Files changed (5) hide show

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ library_name: transformers
 model_name: sft_best_simplification
 tags:
 - generated_from_trainer
-- trl
 - unsloth
 - sft
 licence: license
 ---
@@ -28,16 +28,16 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_best_simplification/runs/kc6lw27s)
 This model was trained with SFT.
 ### Framework versions
-- TRL: 0.23.0
-- Transformers: 4.57.1
-- Pytorch: 2.8.0
 - Datasets: 4.3.0
 - Tokenizers: 0.22.1

 model_name: sft_best_simplification
 tags:
 - generated_from_trainer
 - unsloth
+- trl
 - sft
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_best_simplification/runs/155qq06u)
 This model was trained with SFT.
 ### Framework versions
+- TRL: 0.24.0
+- Transformers: 4.57.2
+- Pytorch: 2.9.0
 - Datasets: 4.3.0
 - Tokenizers: 0.22.1

adapter_config.json CHANGED Viewed

@@ -33,13 +33,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
-    "down_proj",
-    "v_proj",
-    "k_proj",
     "gate_proj",
-    "o_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "up_proj",
     "q_proj",
     "gate_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c191500666a73afc7b36b3ae7277329f9cf35396f5c021a71ed8f7acd8aa3ba
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:72eb424fcda49e1c6989872f6e1285cabdc01a8b195a05e340a9c235ec65bdf9
 size 167832240

tokenizer_config.json CHANGED Viewed

@@ -6179,7 +6179,7 @@
   "legacy": false,
   "model_max_length": 32768,
   "pad_token": "[control_768]",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",

   "legacy": false,
   "model_max_length": 32768,
   "pad_token": "[control_768]",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c40cf0f3fb04a0ff90a766e83b9ee7fcc9800df8b0bf374934d3bf32257ed7ff
 size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bd370958f23b8bcdae63fbf60cf67ea84485eb6f42a2ac31c8d63a9d834dac5
 size 6353