mrinaldi commited on
Commit
5f25313
·
verified ·
1 Parent(s): 122c73b

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - matformer
4
+ - custom-model
5
+ library_name: transformers
6
+ ---
7
+
8
+ # Matformer Model
9
+
10
+ This model was trained using the [Matformer](https://github.com/mrinaldi97/matformer) library.
11
+
12
+ ## Model Type
13
+ - **Architecture**: causal
14
+
15
+ ## Installation
16
+
17
+ First, install the required package:
18
+
19
+ ```bash
20
+ pip install git+https://github.com/mrinaldi97/matformer.git
21
+ ```
22
+
23
+ Or set the `MATFORMER_ROOT` environment variable:
24
+
25
+ ```bash
26
+ export MATFORMER_ROOT=/path/to/matformer
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ import torch
33
+ from transformers import AutoModelForCausalLM, AutoTokenizer
34
+
35
+ # Load model
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ "mrinaldi/prova001",
38
+ trust_remote_code=True
39
+ )
40
+
41
+ # Generate text
42
+ prompt = "The transformer model is a"
43
+ inputs = model.tokenizer.encode(prompt, add_bos=True, add_eos=False)
44
+ inputs = torch.tensor([inputs], device=model.device)
45
+
46
+ with torch.no_grad():
47
+ outputs = model.generate(inputs, max_new_tokens=50)
48
+
49
+ decoded = model.tokenizer.decode(outputs[0].tolist())
50
+ print(decoded)
51
+ ```
52
+
53
+ ## Citation
54
+
55
+ If you use this model, please cite the Matformer library.
config.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_matformer_config_dict": {
3
+ "attention_type": [],
4
+ "bias": false,
5
+ "block_size_for_attention": 128,
6
+ "bos_token_id": 1,
7
+ "compile_flexattn": false,
8
+ "custom_layers": {},
9
+ "decoder": null,
10
+ "default_layer": {
11
+ "attn_impl": "flash",
12
+ "ffn_activation": "swiglu",
13
+ "hooks": {},
14
+ "normalization": "rmsnorm",
15
+ "normalization_position": "post",
16
+ "positional_encoding": "alibi",
17
+ "sliding_window_size": null
18
+ },
19
+ "encoder": null,
20
+ "entropy": null,
21
+ "eos_token_id": 2,
22
+ "ffn_factor": 3.0,
23
+ "has_entropy_model": null,
24
+ "has_text_autoencoder": null,
25
+ "hidden_size": 768,
26
+ "is_causal": true,
27
+ "mask_token_id": null,
28
+ "masked_substitution_rate": null,
29
+ "max_position_embeddings": 1024,
30
+ "model_class": null,
31
+ "name": "BabyLM",
32
+ "num_attention_heads": 12,
33
+ "num_hidden_layers": 12,
34
+ "pad_token_id": 0,
35
+ "rms_norm_eps": 1e-06,
36
+ "rope_theta": 10000.0,
37
+ "sliding_type": null,
38
+ "tie_word_embeddings": false,
39
+ "training_objective": "autoregressive",
40
+ "vocab_size": 32777
41
+ },
42
+ "architectures": [
43
+ "MatformerForCausalLM"
44
+ ],
45
+ "attention_type": [],
46
+ "auto_map": {
47
+ "AutoConfig": "modeling_matformer.MatformerConfig",
48
+ "AutoModel": "modeling_matformer.MatformerModel",
49
+ "AutoModelForCausalLM": "modeling_matformer.MatformerForCausalLM"
50
+ },
51
+ "bias": false,
52
+ "block_size_for_attention": 128,
53
+ "bos_token_id": 1,
54
+ "compile_flexattn": false,
55
+ "custom_layers": {},
56
+ "decoder": null,
57
+ "default_layer": {
58
+ "attn_impl": "flash",
59
+ "ffn_activation": "swiglu",
60
+ "hooks": {},
61
+ "normalization": "rmsnorm",
62
+ "normalization_position": "post",
63
+ "positional_encoding": "alibi",
64
+ "sliding_window_size": null
65
+ },
66
+ "encoder": null,
67
+ "entropy": null,
68
+ "eos_token_id": 2,
69
+ "ffn_factor": 3.0,
70
+ "has_entropy_model": null,
71
+ "has_text_autoencoder": null,
72
+ "hidden_size": 768,
73
+ "is_causal": true,
74
+ "mask_token_id": null,
75
+ "masked_substitution_rate": null,
76
+ "max_position_embeddings": 1024,
77
+ "model_class": null,
78
+ "model_type": "matformer",
79
+ "name": "BabyLM",
80
+ "num_attention_heads": 12,
81
+ "num_hidden_layers": 12,
82
+ "pad_token_id": 0,
83
+ "rms_norm_eps": 1e-06,
84
+ "rope_theta": 10000.0,
85
+ "sliding_type": null,
86
+ "torch_dtype": "bfloat16",
87
+ "training_objective": "autoregressive",
88
+ "transformers_version": "4.53.0.dev0",
89
+ "use_cache": true,
90
+ "vocab_size": 32777
91
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.53.0.dev0"
7
+ }
matformer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_class": "Autoregressive_Model",
3
+ "model_config": {
4
+ "name": "BabyLM",
5
+ "hidden_size": 768,
6
+ "ffn_factor": 3.0,
7
+ "num_hidden_layers": 12,
8
+ "num_attention_heads": 12,
9
+ "vocab_size": 32777,
10
+ "bos_token_id": 1,
11
+ "eos_token_id": 2,
12
+ "pad_token_id": 0,
13
+ "tie_word_embeddings": false,
14
+ "rms_norm_eps": 1e-06,
15
+ "attention_type": [],
16
+ "max_position_embeddings": 1024,
17
+ "block_size_for_attention": 128,
18
+ "rope_theta": 10000.0,
19
+ "compile_flexattn": false,
20
+ "bias": false,
21
+ "training_objective": "autoregressive",
22
+ "is_causal": true,
23
+ "default_layer": {
24
+ "attn_impl": "flash",
25
+ "sliding_window_size": null,
26
+ "positional_encoding": "alibi",
27
+ "normalization": "rmsnorm",
28
+ "normalization_position": "post",
29
+ "ffn_activation": "swiglu",
30
+ "hooks": {}
31
+ },
32
+ "custom_layers": {}
33
+ },
34
+ "training": {
35
+ "optimizer": "muon",
36
+ "lr_scheduling": true,
37
+ "lr": 0.0008,
38
+ "final_lr": 8e-05,
39
+ "hold_steps": 0.013,
40
+ "weight_decay": 0.01,
41
+ "scheduler": "custom",
42
+ "gradient_clip_val": 1.0,
43
+ "warmup_steps": 0.029,
44
+ "max_epochs": 10,
45
+ "accumulate_grad_batches": 5,
46
+ "seed": 27,
47
+ "save_every_n_steps": 8000,
48
+ "checkpoint_name": "Baby_modello_di_prova_28L"
49
+ },
50
+ "tokenizer": {
51
+ "type": "huggingface",
52
+ "pretrained_name": "sapienzanlp/Minerva-350M-base-v1.0",
53
+ "varlen_strategy": "unpadding"
54
+ },
55
+ "data": {
56
+ "data_root": "/home/matteo/BabyLM/mdat_baby_finale/",
57
+ "batch_size": 36,
58
+ "num_workers": 1,
59
+ "mdat_strategy": "MinervaSpecialTokens",
60
+ "mdat_view": null
61
+ },
62
+ "save_dir": "./checkpoints_baby",
63
+ "wandb_project": "BabyLM",
64
+ "wandb_run_name": "Baby-Test-Model-Piccolo"
65
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2db7094c378d191365cd2f8ffdcdf5a745f9c15a63822f1f26cabe14b51915
3
+ size 284831058
modeling_matformer.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modeling_matformer.py
2
+ # Auto-generated by Matformer integration for Hugging Face Hub compatibility
3
+ import os
4
+ import sys
5
+
6
+ # Try to import matformer from environment
7
+ matformer_root = os.getenv("MATFORMER_ROOT")
8
+ if matformer_root:
9
+ matformer_root = os.path.abspath(os.path.expanduser(matformer_root))
10
+ if matformer_root not in sys.path:
11
+ sys.path.insert(0, matformer_root)
12
+
13
+ try:
14
+ from matformer.modelling_matformer import (
15
+ MatformerForCausalLM,
16
+ MatformerForMaskedLM,
17
+ MatformerForSequenceClassification,
18
+ MatformerModel,
19
+ MatformerConfig,
20
+ register_matformer
21
+ )
22
+ register_matformer()
23
+ except ImportError as e:
24
+ import subprocess
25
+ import tempfile
26
+
27
+ print("Matformer not found. Attempting to install from GitHub...")
28
+ try:
29
+ with tempfile.TemporaryDirectory() as tmpdir:
30
+ subprocess.check_call([
31
+ sys.executable, "-m", "pip", "install",
32
+ "git+https://github.com/mrinaldi97/matformer.git"
33
+ ])
34
+
35
+ # Try importing again
36
+ from matformer.modelling_matformer import (
37
+ MatformerForCausalLM,
38
+ MatformerForMaskedLM,
39
+ MatformerForSequenceClassification,
40
+ MatformerModel,
41
+ MatformerConfig,
42
+ register_matformer
43
+ )
44
+ register_matformer()
45
+ print("Successfully installed and imported Matformer!")
46
+
47
+ except Exception as install_error:
48
+ raise ImportError(
49
+ "Failed to install Matformer automatically. Please install manually:\n"
50
+ " pip install git+https://github.com/mrinaldi97/matformer.git\n"
51
+ "Or set the MATFORMER_ROOT environment variable to a local clone:\n"
52
+ " export MATFORMER_ROOT=/path/to/matformer"
53
+ ) from install_error