remove redundant function
Browse files- modeling_youtu.py +0 -24
modeling_youtu.py
CHANGED
|
@@ -414,30 +414,6 @@ class YoutuPreTrainedModel(PreTrainedModel):
|
|
| 414 |
"attentions": YoutuMLAttention,
|
| 415 |
}
|
| 416 |
|
| 417 |
-
def init_weights(self):
|
| 418 |
-
"""
|
| 419 |
-
If needed prunes and maybe initializes weights. If using a custom `PreTrainedModel`, you need to implement any
|
| 420 |
-
initialization logic in `_init_weights`.
|
| 421 |
-
"""
|
| 422 |
-
# Prune heads if needed
|
| 423 |
-
if self.config.pruned_heads:
|
| 424 |
-
self.prune_heads(self.config.pruned_heads)
|
| 425 |
-
|
| 426 |
-
if "-init" in self.name_or_path:
|
| 427 |
-
# Initialize weights
|
| 428 |
-
self.apply(self._initialize_weights)
|
| 429 |
-
|
| 430 |
-
# Adjust weights of o_proj in Attention and down_proj in MLP
|
| 431 |
-
for name, module in self.named_modules():
|
| 432 |
-
if "o_proj" in name or "down_proj" in name:
|
| 433 |
-
# For the output projection, we reinitialize the weights
|
| 434 |
-
scaled_std = self.config.initializer_range * (1.0 / self.config.num_hidden_layers) ** 0.5
|
| 435 |
-
module.weight.data.normal_(mean=0.0, std=scaled_std)
|
| 436 |
-
|
| 437 |
-
# Tie weights should be skipped when not initializing all weights
|
| 438 |
-
# since from_pretrained(...) calls tie weights anyways
|
| 439 |
-
self.tie_weights()
|
| 440 |
-
|
| 441 |
def _init_weights(self, module):
|
| 442 |
super()._init_weights(module)
|
| 443 |
std = self.config.initializer_range
|
|
|
|
| 414 |
"attentions": YoutuMLAttention,
|
| 415 |
}
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
def _init_weights(self, module):
|
| 418 |
super()._init_weights(module)
|
| 419 |
std = self.config.initializer_range
|