Training in progress, epoch 1

Files changed (5) hide show

README.md CHANGED Viewed

@@ -4,9 +4,9 @@ library_name: transformers
 model_name: sft_normal_simplification_mini
 tags:
 - generated_from_trainer
-- sft
-- trl
 - unsloth
 licence: license
 ---
@@ -28,7 +28,7 @@ print(output["generated_text"])
 ## Training procedure
 This model was trained with SFT.
@@ -36,7 +36,7 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.24.0
-- Transformers: 4.57.2
 - Pytorch: 2.9.0
 - Datasets: 4.3.0
 - Tokenizers: 0.22.1

 model_name: sft_normal_simplification_mini
 tags:
 - generated_from_trainer
 - unsloth
+- trl
+- sft
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_normal_simplification_mini/runs/6nfctecu)
 This model was trained with SFT.
 ### Framework versions
 - TRL: 0.24.0
+- Transformers: 4.57.3
 - Pytorch: 2.9.0
 - Datasets: 4.3.0
 - Tokenizers: 0.22.1

adapter_config.json CHANGED Viewed

@@ -33,12 +33,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
     "v_proj",
-    "o_proj",
     "q_proj",
     "up_proj",
     "k_proj",
     "down_proj"
   ],
   "target_parameters": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "q_proj",
     "up_proj",
+    "o_proj",
     "k_proj",
+    "gate_proj",
     "down_proj"
   ],
   "target_parameters": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e277fb005297d0c330731b9924f3ddbb9c99c2a576f578b26f09557a1d7634db
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:98849a45d2183ede0484087fabe18552c4b7baef46b044b7b135efe9f6d1b54c
 size 167832240

tokenizer_config.json CHANGED Viewed

@@ -6179,7 +6179,7 @@
   "legacy": false,
   "model_max_length": 32768,
   "pad_token": "[control_768]",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",

   "legacy": false,
   "model_max_length": 32768,
   "pad_token": "[control_768]",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5477b9bf93fb7fdf2fd2c2a74a6b958ac506e68b7f0ab34f1294ae58c89968b
 size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd9f180dd3b7806d68f7fb492ae4ad60f94a9ae8179eefb0c3aab85d229d445d
 size 6353