Fix custom model loading
Browse files- modeling_custom_gpt.py +30 -1
modeling_custom_gpt.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from transformers import PretrainedConfig, GenerationMixin
|
|
|
|
| 2 |
|
| 3 |
class CustomGPTConfig(PretrainedConfig):
|
| 4 |
model_type = "custom_gpt"
|
|
@@ -194,8 +195,36 @@ class CustomGPTModel(PreTrainedModel, GenerationMixin):
|
|
| 194 |
ignore_index=-100
|
| 195 |
)
|
| 196 |
|
| 197 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
|
|
|
| 199 |
|
| 200 |
# Auto-registration for HuggingFace
|
| 201 |
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
|
|
|
|
| 1 |
from transformers import PretrainedConfig, GenerationMixin
|
| 2 |
+
from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
|
| 3 |
|
| 4 |
class CustomGPTConfig(PretrainedConfig):
|
| 5 |
model_type = "custom_gpt"
|
|
|
|
| 195 |
ignore_index=-100
|
| 196 |
)
|
| 197 |
|
| 198 |
+
return CausalLMOutputWithCrossAttentions(
|
| 199 |
+
loss=loss,
|
| 200 |
+
logits=logits,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
def port_weights(custom_state, hf_state, config):
|
| 204 |
+
"""
|
| 205 |
+
Map weights from pretrained modgpt into HF CustomGPTModel.
|
| 206 |
+
"""
|
| 207 |
+
new_state = hf_state.copy()
|
| 208 |
+
|
| 209 |
+
# --- Embeddings ---
|
| 210 |
+
new_state['embed_tokens.weight'] = custom_state['transformer.wte.weight']
|
| 211 |
+
|
| 212 |
+
# --- LM head ---
|
| 213 |
+
new_state['lm_head.weight'] = custom_state['lm_head.weight']
|
| 214 |
+
|
| 215 |
+
# --- Transformer blocks ---
|
| 216 |
+
for i in range(config.n_layer):
|
| 217 |
+
# Attention Q, K, V
|
| 218 |
+
new_state[f'layers.{i}.attn.c_q.weight'] = custom_state[f'transformer.h.{i}.attn.c_q.weight']
|
| 219 |
+
new_state[f'layers.{i}.attn.c_k.weight'] = custom_state[f'transformer.h.{i}.attn.c_k.weight']
|
| 220 |
+
new_state[f'layers.{i}.attn.c_v.weight'] = custom_state[f'transformer.h.{i}.attn.c_v.weight']
|
| 221 |
+
new_state[f'layers.{i}.attn.c_proj.weight'] = custom_state[f'transformer.h.{i}.attn.c_proj.weight']
|
| 222 |
+
|
| 223 |
+
# MLP
|
| 224 |
+
new_state[f'layers.{i}.mlp.c_fc.weight'] = custom_state[f'transformer.h.{i}.mlp.c_fc.weight']
|
| 225 |
+
new_state[f'layers.{i}.mlp.c_proj.weight'] = custom_state[f'transformer.h.{i}.mlp.c_proj.weight']
|
| 226 |
|
| 227 |
+
return new_state
|
| 228 |
|
| 229 |
# Auto-registration for HuggingFace
|
| 230 |
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
|