gbyuvd commited on
Commit
5fa06f8
·
verified ·
1 Parent(s): 0bcda87

Upload 7 files

Browse files
Files changed (3) hide show
  1. config.json +51 -48
  2. model.safetensors +2 -2
  3. training_args.bin +1 -1
config.json CHANGED
@@ -1,48 +1,51 @@
1
- {
2
- "model_type": "chemq3_mtp",
3
- "architectures": [
4
- "ChemQ3MTPForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.1,
8
- "bos_token_id": 0,
9
- "dtype": "float32",
10
- "eos_token_id": 1,
11
- "head_dim": 64,
12
- "hidden_act": "silu",
13
- "hidden_size": 320,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 1280,
16
- "layer_types": [
17
- "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention"
23
- ],
24
- "max_position_embeddings": 128,
25
- "max_window_layers": 28,
26
- "num_attention_heads": 4,
27
- "num_hidden_layers": 6,
28
- "num_key_value_heads": 2,
29
- "pad_token_id": 2,
30
- "rms_norm_eps": 1e-06,
31
- "rope_scaling": null,
32
- "rope_theta": 10000.0,
33
- "sliding_window": null,
34
- "tie_word_embeddings": true,
35
- "transformers_version": "4.56.1",
36
- "use_cache": false,
37
- "use_sliding_window": false,
38
- "vocab_size": 782,
39
- "num_future_tokens": 3,
40
- "use_mtp_training": true,
41
- "horizon_weights": [1.0, 0.9, 0.81],
42
- "entropy_controller_config": {
43
- "min_entropy": 0.5,
44
- "max_entropy": 3.0,
45
- "target_entropy": 1.5,
46
- "adaptation_rate": 0.01
47
- }
48
- }
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ChemQ3MTPForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.1,
6
+ "bos_token_id": 0,
7
+ "dtype": "float32",
8
+ "entropy_controller_config": {
9
+ "adaptation_rate": 0.01,
10
+ "max_entropy": 3.0,
11
+ "min_entropy": 0.5,
12
+ "target_entropy": 1.5
13
+ },
14
+ "eos_token_id": 1,
15
+ "head_dim": 64,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 320,
18
+ "horizon_weights": [
19
+ 1.0,
20
+ 0.9,
21
+ 0.81
22
+ ],
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 1280,
25
+ "layer_types": [
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention"
32
+ ],
33
+ "max_position_embeddings": 128,
34
+ "max_window_layers": 28,
35
+ "model_type": "chemq3_mtp",
36
+ "num_attention_heads": 4,
37
+ "num_future_tokens": 3,
38
+ "num_hidden_layers": 6,
39
+ "num_key_value_heads": 2,
40
+ "pad_token_id": 2,
41
+ "rms_norm_eps": 1e-06,
42
+ "rope_scaling": null,
43
+ "rope_theta": 10000.0,
44
+ "sliding_window": null,
45
+ "tie_word_embeddings": true,
46
+ "transformers_version": "4.56.1",
47
+ "use_cache": false,
48
+ "use_mtp_training": true,
49
+ "use_sliding_window": false,
50
+ "vocab_size": 782
51
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35079ed16162d885c701f9e05234933a1fed32f4a2ff5a1218d80de95838523d
3
- size 39427428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc47ed13d40f0ae6949d286a9da8de10d788c7868c55af11f50c2ff4eae8e18
3
+ size 39437252
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:426475db7e68e7905ebadd505f46b81c0da444386a36fba5e9404dc848b3bf00
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c61f63685fe07f214a969ac255f140d860b1bc3b7d8b23a075a494fdc4c4d63
3
  size 5368