np-cr
/

testing-gpt-oss

@@ -13,28 +13,6 @@
   "initializer_range": 0.02,
   "intermediate_size": 16,
   "layer_types": [
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "full_attention",
     "sliding_attention",
     "full_attention"
   ],
@@ -62,7 +40,7 @@
   "swiglu_limit": 7.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.55.4",
   "use_cache": true,
   "vocab_size": 201088
 }

   "initializer_range": 0.02,
   "intermediate_size": 16,
   "layer_types": [
     "sliding_attention",
     "full_attention"
   ],
   "swiglu_limit": 7.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.57.6",
   "use_cache": true,
   "vocab_size": 201088
 }