Ksgk-fy commited on
Commit
92b8caa
·
verified ·
1 Parent(s): 061ebdb

Fix custom model loading

Browse files
Files changed (1) hide show
  1. modeling_custom_gpt.py +30 -1
modeling_custom_gpt.py CHANGED
@@ -1,4 +1,5 @@
1
  from transformers import PretrainedConfig, GenerationMixin
 
2
 
3
  class CustomGPTConfig(PretrainedConfig):
4
  model_type = "custom_gpt"
@@ -194,8 +195,36 @@ class CustomGPTModel(PreTrainedModel, GenerationMixin):
194
  ignore_index=-100
195
  )
196
 
197
- return {"loss": loss, "logits": logits}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
 
199
 
200
  # Auto-registration for HuggingFace
201
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
 
1
  from transformers import PretrainedConfig, GenerationMixin
2
+ from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
3
 
4
  class CustomGPTConfig(PretrainedConfig):
5
  model_type = "custom_gpt"
 
195
  ignore_index=-100
196
  )
197
 
198
+ return CausalLMOutputWithCrossAttentions(
199
+ loss=loss,
200
+ logits=logits,
201
+ )
202
+
203
+ def port_weights(custom_state, hf_state, config):
204
+ """
205
+ Map weights from pretrained modgpt into HF CustomGPTModel.
206
+ """
207
+ new_state = hf_state.copy()
208
+
209
+ # --- Embeddings ---
210
+ new_state['embed_tokens.weight'] = custom_state['transformer.wte.weight']
211
+
212
+ # --- LM head ---
213
+ new_state['lm_head.weight'] = custom_state['lm_head.weight']
214
+
215
+ # --- Transformer blocks ---
216
+ for i in range(config.n_layer):
217
+ # Attention Q, K, V
218
+ new_state[f'layers.{i}.attn.c_q.weight'] = custom_state[f'transformer.h.{i}.attn.c_q.weight']
219
+ new_state[f'layers.{i}.attn.c_k.weight'] = custom_state[f'transformer.h.{i}.attn.c_k.weight']
220
+ new_state[f'layers.{i}.attn.c_v.weight'] = custom_state[f'transformer.h.{i}.attn.c_v.weight']
221
+ new_state[f'layers.{i}.attn.c_proj.weight'] = custom_state[f'transformer.h.{i}.attn.c_proj.weight']
222
+
223
+ # MLP
224
+ new_state[f'layers.{i}.mlp.c_fc.weight'] = custom_state[f'transformer.h.{i}.mlp.c_fc.weight']
225
+ new_state[f'layers.{i}.mlp.c_proj.weight'] = custom_state[f'transformer.h.{i}.mlp.c_proj.weight']
226
 
227
+ return new_state
228
 
229
  # Auto-registration for HuggingFace
230
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM