Training in progress, step 300

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,18 +1,18 @@
 ---
-base_model: Ba2han/test-model-muon-2
 library_name: transformers
 model_name: sft-model-muon-2
 tags:
 - generated_from_trainer
-- unsloth
-- sft
 - trl
 licence: license
 ---
 # Model Card for sft-model-muon-2
-This model is a fine-tuned version of [Ba2han/test-model-muon-2](https://huggingface.co/Ba2han/test-model-muon-2).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -28,18 +28,18 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/eexepqgc)
 This model was trained with SFT.
 ### Framework versions
-- TRL: 0.23.0
-- Transformers: 4.57.1
-- Pytorch: 2.8.0+cu128
-- Datasets: 4.3.0
-- Tokenizers: 0.22.1
 ## Citations

 ---
+base_model: Ba2han/sft-model-muon-3
 library_name: transformers
 model_name: sft-model-muon-2
 tags:
 - generated_from_trainer
 - trl
+- sft
+- unsloth
 licence: license
 ---
 # Model Card for sft-model-muon-2
+This model is a fine-tuned version of [Ba2han/sft-model-muon-3](https://huggingface.co/Ba2han/sft-model-muon-3).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/batuhan409/huggingface/runs/k46455gb)
 This model was trained with SFT.
 ### Framework versions
+- TRL: 0.21.0
+- Transformers: 4.56.1
+- Pytorch: 2.8.0
+- Datasets: 4.2.0
+- Tokenizers: 0.22.0
 ## Citations

config.json CHANGED Viewed

@@ -41,8 +41,8 @@
   "rope_theta": 10000,
   "sliding_window": null,
   "tie_word_embeddings": true,
-  "transformers_version": "4.57.1",
-  "unsloth_version": "2025.11.2",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151669

   "rope_theta": 10000,
   "sliding_window": null,
   "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
+  "unsloth_version": "2025.10.10",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151669

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c800aedee4c0b1a2a05004480998176639a9091bc3f7df936e7aad0d817bc686
 size 1111902928

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bf08d8b8f46a2f5200c2caae04393ee9133d1fb16c6c4790ad8584b9719af36
 size 1111902928

special_tokens_map.json CHANGED Viewed

@@ -21,11 +21,5 @@
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": {
-    "content": "<|vision_pad|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

     "rstrip": false,
     "single_word": false
   },
+  "pad_token": "<|vision_pad|>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d95d57cf87e62ab03a8ba222b951a7b279d29c13c4eee5508ff56607a6aef892
-size 6289

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cc833afd0b2782f22802a5c478f7dd0255e9abfc78e1adf2ab30256956ba061
+size 6225