ThatHungarian commited on
Commit
e5f97ab
·
verified ·
1 Parent(s): 4d4ac74

Added files for quantization

Browse files
Files changed (2) hide show
  1. aurora_model.py +104 -0
  2. config.json +2 -2
aurora_model.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers.models.gpt2.modeling_gpt2 import GPT2PreTrainedModel, GPT2Model
4
+ from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
5
+ from transformers import PretrainedConfig
6
+
7
+
8
+ class AuroraGPT2Config(PretrainedConfig):
9
+ model_type = "aurora-gpt2"
10
+
11
+ def __init__(
12
+ self,
13
+ vocab_size=25001,
14
+ n_positions=256,
15
+ n_embd=256,
16
+ n_layer=4,
17
+ n_head=4,
18
+ activation_function="gelu_new",
19
+ attn_pdrop=0.1,
20
+ embd_pdrop=0.1,
21
+ resid_pdrop=0.1,
22
+ layer_norm_epsilon=1e-05,
23
+ bos_token_id=50256,
24
+ eos_token_id=50256,
25
+ initializer_range=0.02,
26
+ use_cache=True,
27
+ **kwargs,
28
+ ):
29
+ super().__init__(**kwargs)
30
+ self.vocab_size = vocab_size
31
+ self.n_positions = n_positions
32
+ self.max_position_embeddings = n_positions
33
+ self.n_embd = n_embd
34
+ self.hidden_size = n_embd # <-- add this line (fix!)
35
+ self.n_layer = n_layer
36
+ self.num_hidden_layers = n_layer # <-- add alias for HF
37
+ self.n_head = n_head
38
+ self.num_attention_heads = n_head # <-- add alias for HF
39
+ self.activation_function = activation_function
40
+ self.attn_pdrop = attn_pdrop
41
+ self.embd_pdrop = embd_pdrop
42
+ self.resid_pdrop = resid_pdrop
43
+ self.layer_norm_epsilon = layer_norm_epsilon
44
+ self.bos_token_id = bos_token_id
45
+ self.eos_token_id = eos_token_id
46
+ self.initializer_range = initializer_range
47
+ self.use_cache = use_cache
48
+
49
+ class AuroraGPT2ForCausalLM(GPT2PreTrainedModel):
50
+ config_class = AuroraGPT2Config
51
+
52
+ def __init__(self, config):
53
+ super().__init__(config)
54
+ self.transformer = GPT2Model(config)
55
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
56
+
57
+ # Tie embeddings and output projection
58
+ self.transformer.wte.weight = self.lm_head.weight
59
+
60
+ # Initialize weights
61
+ self.post_init()
62
+
63
+ def forward(
64
+ self,
65
+ input_ids=None,
66
+ attention_mask=None,
67
+ labels=None,
68
+ **kwargs,
69
+ ):
70
+ # Run the transformer
71
+ outputs = self.transformer(
72
+ input_ids,
73
+ attention_mask=attention_mask,
74
+ **kwargs,
75
+ )
76
+
77
+ hidden_states = outputs.last_hidden_state
78
+ logits = self.lm_head(hidden_states)
79
+
80
+ # Loss (optional, only when labels are passed)
81
+ loss = None
82
+ if labels is not None:
83
+ shift_logits = logits[..., :-1, :].contiguous()
84
+ shift_labels = labels[..., 1:].contiguous()
85
+ loss_fct = nn.CrossEntropyLoss()
86
+ loss = loss_fct(
87
+ shift_logits.view(-1, shift_logits.size(-1)),
88
+ shift_labels.view(-1),
89
+ )
90
+
91
+ return CausalLMOutputWithCrossAttentions(
92
+ loss=loss,
93
+ logits=logits,
94
+ hidden_states=outputs.hidden_states,
95
+ attentions=outputs.attentions,
96
+ )
97
+
98
+ from transformers import AutoConfig, AutoModelForCausalLM
99
+
100
+ # Register Aurora config
101
+ AutoConfig.register("aurora-gpt2", AuroraGPT2Config)
102
+
103
+ # Register Aurora model class
104
+ AutoModelForCausalLM.register(AuroraGPT2Config, AuroraGPT2ForCausalLM)
config.json CHANGED
@@ -2,7 +2,7 @@
2
  "_name_or_path": "./small_gpt",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
- "GPT2LMHeadModel"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
@@ -10,7 +10,7 @@
10
  "eos_token_id": 50256,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
  "n_embd": 256,
15
  "n_head": 4,
16
  "n_inner": null,
 
2
  "_name_or_path": "./small_gpt",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "AuroraGPT2ForCausalLM"
6
  ],
7
  "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
 
10
  "eos_token_id": 50256,
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
+ "model_type": "aurora-gpt2",
14
  "n_embd": 256,
15
  "n_head": 4,
16
  "n_inner": null,