Elias-Schwegler commited on
Commit
f519ac8
·
verified ·
1 Parent(s): e220354

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,50 +1,69 @@
1
  {
2
- "architectures": [
3
- "IQuestLoopCoderForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "auto_map": {
8
- "AutoConfig": "configuration_iquestloopcoder.IQuestLoopCoderConfig",
9
- "AutoModel": "modeling_iquestloopcoder.IQuestLoopCoderModel",
10
- "AutoModelForCausalLM": "modeling_iquestloopcoder.IQuestLoopCoderForCausalLM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
12
- "bos_token_id": 1,
13
- "dtype": "bfloat16",
14
- "eos_token_id": [
15
- 2,
16
- 75864,
17
- 75869
18
  ],
19
- "head_dim": 128,
20
- "hidden_act": "silu",
21
- "hidden_size": 5120,
22
- "initializer_range": 0.02,
23
- "intermediate_size": 27648,
24
- "loop_num": 2,
25
- "loop_window_size": 64,
26
- "max_position_embeddings": 131072,
27
- "mlp_bias": false,
28
- "model_type": "iquestloopcoder",
29
- "num_attention_heads": 40,
30
- "num_hidden_layers": 80,
31
- "num_key_value_heads": 8,
32
- "rms_norm_eps": 1e-05,
33
- "rope_scaling": null,
34
- "rope_theta": 500000,
35
- "tie_word_embeddings": false,
36
- "transformers_version": "4.57.3",
37
- "use_cache": true,
38
- "vocab_size": 76800,
39
- "quantization_config": {
40
- "ignore": [
41
- "lm_head"
42
- ],
43
- "quant_algo": "NVFP4",
44
- "producer": {
45
- "name": "modelopt",
46
- "version": "0.40.0"
47
- },
48
- "quant_method": "modelopt"
49
- }
50
  }
 
1
  {
2
+ "architectures": [
3
+ "IQuestLoopCoderForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_iquestloopcoder.IQuestLoopCoderConfig",
9
+ "AutoModel": "modeling_iquestloopcoder.IQuestLoopCoderModel",
10
+ "AutoModelForCausalLM": "modeling_iquestloopcoder.IQuestLoopCoderForCausalLM"
11
+ },
12
+ "bos_token_id": 1,
13
+ "dtype": "bfloat16",
14
+ "eos_token_id": [
15
+ 2,
16
+ 75864,
17
+ 75869
18
+ ],
19
+ "head_dim": 128,
20
+ "hidden_act": "silu",
21
+ "hidden_size": 5120,
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 27648,
24
+ "loop_num": 2,
25
+ "loop_window_size": 64,
26
+ "max_position_embeddings": 131072,
27
+ "mlp_bias": false,
28
+ "model_type": "iquestloopcoder",
29
+ "num_attention_heads": 40,
30
+ "num_hidden_layers": 80,
31
+ "num_key_value_heads": 8,
32
+ "rms_norm_eps": 1e-05,
33
+ "rope_scaling": null,
34
+ "rope_theta": 500000,
35
+ "tie_word_embeddings": false,
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 76800,
39
+ "quantization_config": {
40
+ "config_groups": {
41
+ "group_0": {
42
+ "input_activations": {
43
+ "dynamic": false,
44
+ "num_bits": 4,
45
+ "type": "float",
46
+ "group_size": 16
47
+ },
48
+ "weights": {
49
+ "dynamic": false,
50
+ "num_bits": 4,
51
+ "type": "float",
52
+ "group_size": 16
53
+ },
54
+ "targets": [
55
+ "Linear"
56
+ ]
57
+ }
58
  },
59
+ "ignore": [
60
+ "lm_head"
 
 
 
 
61
  ],
62
+ "quant_algo": "NVFP4",
63
+ "producer": {
64
+ "name": "modelopt",
65
+ "version": "0.41.0"
66
+ },
67
+ "quant_method": "modelopt"
68
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
legacy_files/added_tokens.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 75873,
3
+ "</tool_call>": 75877,
4
+ "</tool_response>": 75879,
5
+ "</tools>": 75875,
6
+ "<CLS>": 75858,
7
+ "<EOD>": 75860,
8
+ "<MASK>": 75861,
9
+ "<PAD>": 75862,
10
+ "<SEP>": 75859,
11
+ "<think>": 75872,
12
+ "<tool_call>": 75876,
13
+ "<tool_response>": 75878,
14
+ "<tools>": 75874,
15
+ "<|CLS|>": 75880,
16
+ "<|EOD|>": 75882,
17
+ "<|MASK|>": 75883,
18
+ "<|PAD|>": 75884,
19
+ "<|SEP|>": 75881,
20
+ "<|endoftext|>": 75869,
21
+ "<|file_sep|>": 75871,
22
+ "<|fim_middle|>": 75866,
23
+ "<|fim_pad|>": 75868,
24
+ "<|fim_prefix|>": 75865,
25
+ "<|fim_suffix|>": 75867,
26
+ "<|im_end|>": 75864,
27
+ "<|im_start|>": 75863,
28
+ "<|repo_name|>": 75870
29
+ }
legacy_files/hf_quant_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.41.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "NVFP4",
8
+ "kv_cache_quant_algo": null,
9
+ "group_size": 16,
10
+ "exclude_modules": [
11
+ "lm_head"
12
+ ]
13
+ }
14
+ }
legacy_files/special_tokens_map.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|CLS|>",
4
+ "<|SEP|>",
5
+ "<|EOD|>",
6
+ "<|MASK|>",
7
+ "<|PAD|>",
8
+ "<|fim_prefix|>",
9
+ "<|fim_middle|>",
10
+ "<|fim_suffix|>",
11
+ "<|im_start|>",
12
+ "<|im_end|>",
13
+ "<|fim_pad|>",
14
+ "<|endoftext|>",
15
+ "<|repo_name|>",
16
+ "<|file_sep|>",
17
+ "<think>",
18
+ "</think>"
19
+ ],
20
+ "bos_token": {
21
+ "content": "<s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "eos_token": {
28
+ "content": "<|im_end|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "pad_token": {
35
+ "content": "<|endoftext|>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "unk_token": {
42
+ "content": "<unk>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": true
47
+ }
48
+ }
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b5af107a20f8e0e0bdae5c45c579a13dc2f659aaacb0ca389ab3524f566935
3
- size 4937199704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb421765450b1973baefbf307181772772e106240e2720dcccc182e764c4c18
3
+ size 4936202752
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9010c28d8b6e9f46384072a54e45eb87ee3d70dc7dddda5f091f46b52a88fa79
3
- size 4938439016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53393123f0b2a92ad6a39c8799f9af1cb3cc68bddb8bcc5f9f11e479cf2def73
3
+ size 4937255232
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:318b778d7802aea8a05427a195a948a3292896d4c6c25ceeb9b1f03b21d9329f
3
- size 4938439016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8488ee175bf2ebe7e0fa6529a62be49c38834f7127c680c3ce188d399bebb8ac
3
+ size 4937255232
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1edc5eba6f5dc7b5b0cd993b3a4b1e8df5524a44ff7b82919d8a0fe1982c03d4
3
- size 4938439016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49276ddde39b3f45328cfd845fc8547fbfcc76cd615fd917ea4d08e5e6148ca0
3
+ size 4937255232
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:783ed16140e9f0e19c10819b71bb032391da2d72e10c750cdfae6d7d9f6ed523
3
- size 3769818152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f2cb2b12e1631542403147c084c7792d0622cfbec968df1319e540baa80ce9c
3
+ size 3769105000
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
modeling_iquestloopcoder.py CHANGED
@@ -884,7 +884,7 @@ class IQuestLoopCoderModel(IQuestLoopCoderPreTrainedModel):
884
  # Initialize weights and apply final processing
885
  self.post_init()
886
 
887
- @check_model_inputs()
888
  @auto_docstring
889
  def forward(
890
  self,
 
884
  # Initialize weights and apply final processing
885
  self.post_init()
886
 
887
+ @check_model_inputs
888
  @auto_docstring
889
  def forward(
890
  self,