jnjj commited on
Commit
faa949b
·
verified ·
1 Parent(s): 537d386

Upload model state (Full) via LLM Platform

Browse files

Saved from LLM Platform UI. Model class: LlamaForCausalLM. State: Full Model.

Files changed (2) hide show
  1. config.json +7 -6
  2. model.safetensors +2 -2
config.json CHANGED
@@ -10,8 +10,8 @@
10
  ],
11
  "attention_bias": false,
12
  "attention_dropout": 0.0,
13
- "attention_variant": "flash_attention_2",
14
- "attn_implementation": "flash_attention_2",
15
  "auto_optimization_enabled": false,
16
  "baseline_distribution": null,
17
  "bitnet_applied": false,
@@ -89,7 +89,7 @@
89
  "num_hidden_layers": 1,
90
  "num_key_value_heads": 1,
91
  "off_topic_filter": false,
92
- "optimizer": "adamw",
93
  "original_num_layers": 1,
94
  "pad_token_id": 2,
95
  "peft_adapter_added": false,
@@ -102,7 +102,7 @@
102
  "profanity_filter": false,
103
  "pruning_amount": null,
104
  "pruning_applied": false,
105
- "qa_restrictions_removed": false,
106
  "qat_applied": false,
107
  "quantization_applied": false,
108
  "quantization_mode": "float32",
@@ -120,6 +120,7 @@
120
  "rms_norm_eps": 1e-05,
121
  "rope_scaling": {
122
  "factor": 2.0,
 
123
  "type": "linear"
124
  },
125
  "rope_scaling_factor": 2.0,
@@ -144,9 +145,9 @@
144
  "toxicity_filter": false,
145
  "transformers_version": "4.50.0.dev0",
146
  "ultra_fast_mode": false,
147
- "untied_embeddings": false,
148
  "use_cache": false,
149
- "use_flash_attention_2": true,
150
  "vocab_size": 32002,
151
  "weight_decay_disabled": false,
152
  "weight_init_applied": false
 
10
  ],
11
  "attention_bias": false,
12
  "attention_dropout": 0.0,
13
+ "attention_variant": "auto",
14
+ "attn_implementation": "auto",
15
  "auto_optimization_enabled": false,
16
  "baseline_distribution": null,
17
  "bitnet_applied": false,
 
89
  "num_hidden_layers": 1,
90
  "num_key_value_heads": 1,
91
  "off_topic_filter": false,
92
+ "optimizer": "adamw_torch",
93
  "original_num_layers": 1,
94
  "pad_token_id": 2,
95
  "peft_adapter_added": false,
 
102
  "profanity_filter": false,
103
  "pruning_amount": null,
104
  "pruning_applied": false,
105
+ "qa_restrictions_removed": true,
106
  "qat_applied": false,
107
  "quantization_applied": false,
108
  "quantization_mode": "float32",
 
120
  "rms_norm_eps": 1e-05,
121
  "rope_scaling": {
122
  "factor": 2.0,
123
+ "rope_type": "linear",
124
  "type": "linear"
125
  },
126
  "rope_scaling_factor": 2.0,
 
145
  "toxicity_filter": false,
146
  "transformers_version": "4.50.0.dev0",
147
  "ultra_fast_mode": false,
148
+ "untied_embeddings": true,
149
  "use_cache": false,
150
+ "use_flash_attention_2": false,
151
  "vocab_size": 32002,
152
  "weight_decay_disabled": false,
153
  "weight_init_applied": false
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9637659163cf84d104e98357e29e54677569a502a6ec22688980701374af790b
3
- size 688936172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aaaa407759ba85a4710ddadec26cb41ccae754db050b26726ca444378a256d0
3
+ size 51960328