Training in progress, epoch 1

Files changed (6) hide show

README.md CHANGED Viewed

@@ -34,11 +34,11 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.21.0
-- Transformers: 4.55.4
-- Pytorch: 2.8.0+cu126
 - Datasets: 4.0.0
-- Tokenizers: 0.21.4
 ## Citations

 ### Framework versions
+- TRL: 0.25.1
+- Transformers: 4.57.2
+- Pytorch: 2.9.0+cu126
 - Datasets: 4.0.0
+- Tokenizers: 0.22.1
 ## Citations

config.json CHANGED Viewed

@@ -7,6 +7,7 @@
   "attention_dropout": 0.0,
   "attn_logit_softcapping": null,
   "bos_token_id": 2,
   "eos_token_id": 1,
   "final_logit_softcapping": null,
   "head_dim": 256,
@@ -46,8 +47,7 @@
   "rope_scaling": null,
   "rope_theta": 1000000.0,
   "sliding_window": 512,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.4",
   "use_bidirectional_attention": false,
   "use_cache": true,
   "vocab_size": 262144

   "attention_dropout": 0.0,
   "attn_logit_softcapping": null,
   "bos_token_id": 2,
+  "dtype": "bfloat16",
   "eos_token_id": 1,
   "final_logit_softcapping": null,
   "head_dim": 256,
   "rope_scaling": null,
   "rope_theta": 1000000.0,
   "sliding_window": 512,
+  "transformers_version": "4.57.2",
   "use_bidirectional_attention": false,
   "use_cache": true,
   "vocab_size": 262144

generation_config.json CHANGED Viewed

@@ -1,11 +1,13 @@
 {
   "cache_implementation": "hybrid",
   "do_sample": true,
   "eos_token_id": [
     1,
     106
   ],
   "top_k": 64,
   "top_p": 0.95,
-  "transformers_version": "4.55.4"
 }

 {
+  "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "do_sample": true,
   "eos_token_id": [
     1,
     106
   ],
+  "pad_token_id": 0,
   "top_k": 64,
   "top_p": 0.95,
+  "transformers_version": "4.57.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b10557428c98c0639986a03e71013efb7cdb38b40983d9048aa485cef784e3c
 size 536223056

 version https://git-lfs.github.com/spec/v1
+oid sha256:7833586c1e7e2a41ba630e6ae930f0e240e9a8b4f9c22c90fc08f36531e23619
 size 536223056

runs/Dec02_22-38-04_ad0a00be3b2f/events.out.tfevents.1764715119.ad0a00be3b2f.205.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9814c132cd3dfdbee8d0f41771df23acaf44f851cda3ebdb26ca741b7c92a3e
+size 10912

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39e2df62bec9ac052e24b6a636a9c7182f315a1841a66abe01375ff744b7fbfc
-size 6225

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab3022985b93f793c268fe569b03e8210c73a3923859d58a4f6a73503cf15af0
+size 6289