nixie1981 commited on
Commit
b01a16c
·
verified ·
1 Parent(s): 7ed7540

Upload modeling_conceptframemet.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_conceptframemet.py +15 -3
modeling_conceptframemet.py CHANGED
@@ -50,10 +50,22 @@ class ConceptFrameMetForMetaphorDetection(nn.Module):
50
 
51
  # Load encoder (RoBERTa) with correct type_vocab_size
52
  from transformers import RobertaConfig
53
- encoder_config = RobertaConfig.from_pretrained(encoder_model_name)
54
- encoder_config.type_vocab_size = 4 # CRITICAL: Match training config
55
 
56
- self.encoder = RobertaModel.from_pretrained(encoder_model_name, config=encoder_config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  self.tokenizer = RobertaTokenizer.from_pretrained(encoder_model_name)
58
  self.config = self.encoder.config
59
 
 
50
 
51
  # Load encoder (RoBERTa) with correct type_vocab_size
52
  from transformers import RobertaConfig
 
 
53
 
54
+ # Load base model first
55
+ self.encoder = RobertaModel.from_pretrained(encoder_model_name)
56
+
57
+ # Resize token_type_embeddings to match training (type_vocab_size=4)
58
+ # This is needed because the model was trained with 4 token types
59
+ if self.encoder.embeddings.token_type_embeddings.weight.shape[0] != 4:
60
+ old_embeddings = self.encoder.embeddings.token_type_embeddings
61
+ new_embeddings = nn.Embedding(4, old_embeddings.embedding_dim)
62
+ # Copy the original embedding (for type 0)
63
+ new_embeddings.weight.data[0] = old_embeddings.weight.data[0]
64
+ # Initialize the rest
65
+ new_embeddings.weight.data[1:].normal_(mean=0.0, std=self.encoder.config.initializer_range)
66
+ self.encoder.embeddings.token_type_embeddings = new_embeddings
67
+ self.encoder.config.type_vocab_size = 4
68
+
69
  self.tokenizer = RobertaTokenizer.from_pretrained(encoder_model_name)
70
  self.config = self.encoder.config
71