Training in progress, step 31000

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 library_name: transformers
 tags:
 - generated_from_trainer
 model-index:
 - name: smallm_140_rope

 ---
 library_name: transformers
 tags:
+- smallm
 - generated_from_trainer
 model-index:
 - name: smallm_140_rope

config.json CHANGED Viewed

@@ -4,10 +4,6 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.1,
-  "auto_map": {
-    "AutoConfig": "config.SmalLmConfig",
-    "AutoModelForCausalLM": "model.SmalLmForCausalLM"
-  },
   "balancing_coef": 0.0001,
   "bos_token_id": 1,
   "embedding_dropout": 0.0,
@@ -43,7 +39,7 @@
   "sliding_window_attention": true,
   "sliding_window_context": 1024,
   "sliding_window_period": 4,
-  "static_residual": true,
   "token_experts": 3,
   "torch_dtype": "float32",
   "transformers_version": "4.50.3",

   ],
   "attention_bias": false,
   "attention_dropout": 0.1,
   "balancing_coef": 0.0001,
   "bos_token_id": 1,
   "embedding_dropout": 0.0,
   "sliding_window_attention": true,
   "sliding_window_context": 1024,
   "sliding_window_period": 4,
+  "static_residual": false,
   "token_experts": 3,
   "torch_dtype": "float32",
   "transformers_version": "4.50.3",

model.py CHANGED Viewed

@@ -635,8 +635,6 @@ class SmalLmModel(SmalLmPreTrainedModel):
         cache_position: Optional[torch.Tensor],
     ):
         if USE_FLASH and inputs_embeds.is_cuda:
-            if attention_mask is None:
-                attention_mask = torch.ones(*inputs_embeds.shape[:2], device=inputs_embeds.device)
             return attention_mask
         dtype, device = inputs_embeds.dtype, inputs_embeds.device
         past_token = (

         cache_position: Optional[torch.Tensor],
     ):
         if USE_FLASH and inputs_embeds.is_cuda:
             return attention_mask
         dtype, device = inputs_embeds.dtype, inputs_embeds.device
         past_token = (

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc13ffa23a1f5210f44d10669aa87f3ec7bfb7a2664786f76ce56132b042639e
 size 563074920

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5bbb33796637d85d181dd86914f0d0b2932daf04a02e2d42b0e675ffd28388a
 size 563074920

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c535587179e528588509a5683a599c692165045d10114ebf77f1f94172c77e9
-size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:37300a576f29a5a8ddf81ea75e13d6c1ee5bf582f11fc6860569d8fcc97499d1
+size 6008