Training in progress, epoch 1

Files changed (6) hide show

README.md CHANGED Viewed

@@ -4,9 +4,9 @@ library_name: transformers
 model_name: sft_normal_simplification
 tags:
 - generated_from_trainer
-- unsloth
-- trl
 - sft
 licence: license
 ---
@@ -28,18 +28,18 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft-normal_smiplification/runs/rsnevvqb)
 This model was trained with SFT.
 ### Framework versions
-- TRL: 0.18.1
-- Transformers: 4.52.4
-- Pytorch: 2.6.0
-- Datasets: 3.6.0
-- Tokenizers: 0.21.1
 ## Citations

 model_name: sft_normal_simplification
 tags:
 - generated_from_trainer
 - sft
+- trl
+- unsloth
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/ioakeime-aristotle-university-of-thessaloniki/sft_normal_simplification/runs/b77rp42v)
 This model was trained with SFT.
 ### Framework versions
+- TRL: 0.24.0
+- Transformers: 4.57.2
+- Pytorch: 2.9.0
+- Datasets: 4.3.0
+- Tokenizers: 0.22.1
 ## Citations

adapter_config.json CHANGED Viewed

@@ -1,9 +1,16 @@
 {
   "alpha_pattern": {},
-  "auto_mapping": null,
   "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
   "bias": "none",
   "corda_config": null,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
@@ -15,25 +22,29 @@
   "loftq_config": {},
   "lora_alpha": 16,
   "lora_bias": false,
-  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
-    "q_proj",
     "down_proj",
     "o_proj",
     "v_proj",
     "k_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

 {
+  "alora_invocation_tokens": null,
   "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": {
+    "base_model_class": "MistralForCausalLM",
+    "parent_library": "transformers.models.mistral.modeling_mistral",
+    "unsloth_fixed": true
+  },
   "base_model_name_or_path": "unsloth/mistral-7b-v0.3-bnb-4bit",
   "bias": "none",
   "corda_config": null,
+  "ensure_weight_tying": false,
   "eva_config": null,
   "exclude_modules": null,
   "fan_in_fan_out": false,
   "loftq_config": {},
   "lora_alpha": 16,
   "lora_bias": false,
+  "lora_dropout": 0.0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "peft_version": "0.18.0",
+  "qalora_group_size": 16,
   "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "down_proj",
     "o_proj",
     "v_proj",
+    "q_proj",
     "k_proj",
     "gate_proj"
   ],
+  "target_parameters": null,
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0aea5a24135ed2e25d95892aac12a4cffabf0e2b3873f2636bcf4a87f65a64bd
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:aea6fd995be4a9d88ef2fe7f54e9eec76f45406d38838aed9c251dd887222a2a
 size 167832240

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 8192,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {
@@ -6969,6 +6964,12 @@
           "id": "A",
           "type_id": 0
         }
       }
     ],
     "pair": [
@@ -6984,6 +6985,12 @@
           "type_id": 0
         }
       },
       {
         "SpecialToken": {
           "id": "<s>",
@@ -6995,9 +7002,24 @@
           "id": "B",
           "type_id": 1
         }
       }
     ],
     "special_tokens": {
       "<s>": {
         "id": "<s>",
         "ids": [

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {
           "id": "A",
           "type_id": 0
         }
+      },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 0
+        }
       }
     ],
     "pair": [
           "type_id": 0
         }
       },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 0
+        }
+      },
       {
         "SpecialToken": {
           "id": "<s>",
           "id": "B",
           "type_id": 1
         }
+      },
+      {
+        "SpecialToken": {
+          "id": "</s>",
+          "type_id": 1
+        }
       }
     ],
     "special_tokens": {
+      "</s>": {
+        "id": "</s>",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "</s>"
+        ]
+      },
       "<s>": {
         "id": "<s>",
         "ids": [

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "add_bos_token": true,
-  "add_eos_token": false,
   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {

 {
   "add_bos_token": true,
+  "add_eos_token": true,
   "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3968f59b4bab6b4ce5efa8d174775796faff4737b44f1f81ac66f16f94695ff
-size 5688

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6c34933032ee19e8bb8880b85ca22fa8de6241d3240cc742a31e6c59f0cbe8a
+size 6353