Goekdeniz-Guelmez commited on
Commit
ba05c4f
·
verified ·
1 Parent(s): 98735f0

Upload 13 files

Browse files
Files changed (4) hide show
  1. config.used.json +50 -0
  2. create.py +21 -1
  3. model.safetensors +2 -2
  4. modeling_minimax.py +1 -1
config.used.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "layer_types": [
7
+ "linear_attention",
8
+ "full_attention",
9
+ "linear_attention",
10
+ "full_attention"
11
+ ],
12
+ "auto_map": {
13
+ "AutoConfig": "configuration_minimax.MiniMaxConfig",
14
+ "AutoModelForCausalLM": "modeling_minimax.MiniMaxForCausalLM"
15
+ },
16
+ "bos_token_id": null,
17
+ "eos_token_id": 200020,
18
+ "head_dim": 32,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 256,
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 512,
23
+ "layernorm_full_attention_alpha": 3.5565588200778455,
24
+ "layernorm_full_attention_beta": 1.0,
25
+ "layernorm_linear_attention_alpha": 3.5565588200778455,
26
+ "layernorm_linear_attention_beta": 1.0,
27
+ "layernorm_mlp_alpha": 3.5565588200778455,
28
+ "layernorm_mlp_beta": 1.0,
29
+ "max_position_embeddings": 1024,
30
+ "model_type": "minimax",
31
+ "num_attention_heads": 4,
32
+ "num_experts_per_tok": 1,
33
+ "num_hidden_layers": 4,
34
+ "num_key_value_heads": 2,
35
+ "num_local_experts": 2,
36
+ "output_router_logits": false,
37
+ "postnorm": true,
38
+ "rms_norm_eps": 1e-05,
39
+ "rope_theta": 10000,
40
+ "rotary_dim": 16,
41
+ "router_aux_loss_coef": 0.001,
42
+ "router_jitter_noise": 0.0,
43
+ "shared_intermediate_size": 0,
44
+ "shared_moe_mode": "sigmoid",
45
+ "sliding_window": null,
46
+ "tie_word_embeddings": false,
47
+ "transformers_version": "4.45.2",
48
+ "use_cache": true,
49
+ "vocab_size": 200064
50
+ }
create.py CHANGED
@@ -2,6 +2,7 @@ import sys
2
  import os
3
  from safetensors.torch import save_file
4
  import json
 
5
 
6
  # Add the directory containing your modeling.py and configuration.py to the Python path
7
  model_dir = "/Users/Goekdeniz.Guelmez@computacenter.com/Library/CloudStorage/OneDrive-COMPUTACENTER/Desktop/MiniMax01Text-Dev"
@@ -19,9 +20,21 @@ with open(config_path, 'r') as f:
19
  # Create the configuration object
20
  config = MiniMaxConfig(**config_dict)
21
 
 
 
 
 
 
 
 
 
 
22
  # Create the model
23
  small_model = MiniMaxForCausalLM(config)
24
 
 
 
 
25
  # Print parameter count to verify
26
  param_count = sum(p.numel() for p in small_model.parameters())
27
  print(f"Model has {param_count:,} parameters")
@@ -29,7 +42,14 @@ print(f"Model has {param_count:,} parameters")
29
  # Convert model to state dict
30
  model_state_dict = small_model.state_dict()
31
 
 
 
 
 
 
32
  # Save as safetensors
33
  save_file(model_state_dict, os.path.join(model_dir, "model.safetensors"))
34
 
35
- print("Model saved in safetensors format")
 
 
 
2
  import os
3
  from safetensors.torch import save_file
4
  import json
5
+ import torch
6
 
7
  # Add the directory containing your modeling.py and configuration.py to the Python path
8
  model_dir = "/Users/Goekdeniz.Guelmez@computacenter.com/Library/CloudStorage/OneDrive-COMPUTACENTER/Desktop/MiniMax01Text-Dev"
 
20
  # Create the configuration object
21
  config = MiniMaxConfig(**config_dict)
22
 
23
+ # Print attention layout info
24
+ if getattr(config, "linear_attention", False):
25
+ print("Using linear attention layout from config.")
26
+ else:
27
+ print("Using full attention layout from config.")
28
+
29
+ # Set random seed for reproducibility
30
+ torch.manual_seed(42)
31
+
32
  # Create the model
33
  small_model = MiniMaxForCausalLM(config)
34
 
35
+ # Set model to evaluation mode
36
+ small_model.eval()
37
+
38
  # Print parameter count to verify
39
  param_count = sum(p.numel() for p in small_model.parameters())
40
  print(f"Model has {param_count:,} parameters")
 
42
  # Convert model to state dict
43
  model_state_dict = small_model.state_dict()
44
 
45
+ # Save the config used for reproducibility
46
+ used_config_path = os.path.join(model_dir, "config.used.json")
47
+ with open(used_config_path, 'w') as f:
48
+ json.dump(config_dict, f, indent=2)
49
+
50
  # Save as safetensors
51
  save_file(model_state_dict, os.path.join(model_dir, "model.safetensors"))
52
 
53
+ print("Model saved in safetensors format")
54
+
55
+ print(small_model)
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c7797546b0e092d6b3236d1f9826af42bfe293592590e2032aafe77ba8592a4
3
- size 423910680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a70cdf1018eab074427a9a85060958141f22c3346cd377869c8c13ff437a26ed
3
+ size 426550448
modeling_minimax.py CHANGED
@@ -44,7 +44,7 @@ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
44
  from transformers.processing_utils import Unpack
45
  from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple
46
  from transformers.utils.generic import OutputRecorder, check_model_inputs
47
- from .configuration_minimax import MiniMaxConfig
48
 
49
 
50
  @use_kernel_forward_from_hub("RMSNorm")
 
44
  from transformers.processing_utils import Unpack
45
  from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple
46
  from transformers.utils.generic import OutputRecorder, check_model_inputs
47
+ from configuration_minimax import MiniMaxConfig
48
 
49
 
50
  @use_kernel_forward_from_hub("RMSNorm")