Siddharth63
/

LLM2vec-Smol360-mntp

Model card Files Files and versions

3v324v23 commited on Feb 2, 2025

Commit

7681e47

·

1 Parent(s): c3879e3

initial commit

Files changed (3) hide show

README.md +0 -3
config.json +4 -3
model.safetensors +2 -2

README.md DELETED Viewed

@@ -1,3 +0,0 @@
----
-license: artistic-2.0
----

config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_name_or_path": "HuggingFaceTB/SmolLM2-360M",
   "architectures": [
-    "LlamaModel"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
@@ -13,6 +13,7 @@
   "intermediate_size": 2560,
   "is_llama_config": true,
   "max_position_embeddings": 8192,
   "model_type": "llama",
   "num_attention_heads": 15,
   "num_hidden_layers": 32,
@@ -23,8 +24,8 @@
   "rope_scaling": null,
   "rope_theta": 100000,
   "tie_word_embeddings": true,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.40.2",
   "use_cache": true,
   "vocab_size": 49152
 }

 {
   "_name_or_path": "HuggingFaceTB/SmolLM2-360M",
   "architectures": [
+    "LlamaBiModel"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "intermediate_size": 2560,
   "is_llama_config": true,
   "max_position_embeddings": 8192,
+  "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 15,
   "num_hidden_layers": 32,
   "rope_scaling": null,
   "rope_theta": 100000,
   "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
   "use_cache": true,
   "vocab_size": 49152
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f25f5987edb5120ee46c009be41b5d0a28c1583e8b591c9e52f7d8e3a6386c5
-size 723673176

 version https://git-lfs.github.com/spec/v1
+oid sha256:488bd6902ec21953eb09dc0cc291a38f15e1e30ff8486a6430e7beb7736d555b
+size 1447315344