sergiopaniego HF Staff commited on
Commit
8edb8ad
·
verified ·
1 Parent(s): 2486939

Upload NemotronHForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +4 -4
  2. generation_config.json +5 -8
  3. model.safetensors +2 -2
config.json CHANGED
@@ -12,7 +12,7 @@
12
  "expand": 2,
13
  "head_dim": 128,
14
  "hidden_dropout": 0.0,
15
- "hidden_size": 8,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 32,
18
  "layer_norm_epsilon": 1e-05,
@@ -20,7 +20,7 @@
20
  "mamba",
21
  "attention"
22
  ],
23
- "mamba_head_dim": 2,
24
  "mamba_hidden_act": "silu",
25
  "mamba_num_heads": 4,
26
  "mamba_proj_bias": false,
@@ -52,7 +52,7 @@
52
  "residual_in_fp32": false,
53
  "routed_scaling_factor": 1.0,
54
  "sliding_window": null,
55
- "ssm_state_size": 8,
56
  "tie_word_embeddings": false,
57
  "time_step_floor": 0.0001,
58
  "time_step_limit": [
@@ -64,7 +64,7 @@
64
  "time_step_max": 0.1,
65
  "time_step_min": 0.001,
66
  "topk_group": 1,
67
- "transformers_version": "5.3.0.dev0",
68
  "use_bias": false,
69
  "use_cache": true,
70
  "use_conv_bias": true,
 
12
  "expand": 2,
13
  "head_dim": 128,
14
  "hidden_dropout": 0.0,
15
+ "hidden_size": 16,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 32,
18
  "layer_norm_epsilon": 1e-05,
 
20
  "mamba",
21
  "attention"
22
  ],
23
+ "mamba_head_dim": 8,
24
  "mamba_hidden_act": "silu",
25
  "mamba_num_heads": 4,
26
  "mamba_proj_bias": false,
 
52
  "residual_in_fp32": false,
53
  "routed_scaling_factor": 1.0,
54
  "sliding_window": null,
55
+ "ssm_state_size": 16,
56
  "tie_word_embeddings": false,
57
  "time_step_floor": 0.0001,
58
  "time_step_limit": [
 
64
  "time_step_max": 0.1,
65
  "time_step_min": 0.001,
66
  "topk_group": 1,
67
+ "transformers_version": "5.3.0",
68
  "use_bias": false,
69
  "use_cache": true,
70
  "use_conv_bias": true,
generation_config.json CHANGED
@@ -1,13 +1,10 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "do_sample": true,
5
- "eos_token_id": [
6
- 2,
7
- 11
8
- ],
9
  "pad_token_id": 0,
10
- "temperature": 1.0,
11
- "top_p": 1.0,
12
- "transformers_version": "5.3.0.dev0"
13
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "output_attentions": false,
6
+ "output_hidden_states": false,
 
 
7
  "pad_token_id": 0,
8
+ "transformers_version": "5.3.0",
9
+ "use_cache": true
 
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:760883f4d4754b5613061c8383e6ccdf7803180914562e26e5e5959f70e748b6
3
- size 4221632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e48197fa8403975baa3f2426192973fbc4859097b09a8c849eaa92f23e655c
3
+ size 8444544