namednil
/

STEP

Feature Extraction

STEP_finetuning

Model card Files Files and versions

namednil commited on Jul 7, 2024

Commit

59d55b8

·

verified ·

1 Parent(s): b0e8c81

typos

Files changed (1) hide show

step_finetune.py +3 -3

step_finetune.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Optional, List, Callable, Mapping, Any, Union
 import os
 class STEPFinetuningModelConfig(T5Config):
-    model_type = "STEP_finetune"
     def __init__(self,
                  num_examples: int = 512,
@@ -40,7 +40,7 @@ class STEPFinetuningModel(PreTrainedModel):
         # There are two cases: (1) we initialize the model after STEP-pretraining, i.e. the tunable prefix is not set
         # and (2) the model has been fine-tuned on downstream data, and hence there is meaningful data in the tunable prefix
-        # Initialize the prefix with NaNs. If we initialize from STEP-pretraining, this will not be overwritten by a custom version of from_pretrained
         # if we initialize after fine-tuning, the NaNs will be overwritten anyway.
         self.prefix_embedding = torch.nn.Parameter(torch.nan + torch.zeros((1, self.config.prefix_length, self.config.d_model)))
@@ -49,7 +49,7 @@ class STEPFinetuningModel(PreTrainedModel):
     def _initialize_prefix(self):
         prefix_init_tensor = self.prefix_init_tensor
         if self.config.random_selection:
-            # randomize selection of FSTs to average for initialization the prefix.
             prefix_init_tensor = prefix_init_tensor[torch.randperm(prefix_init_tensor.shape[0]), :, :]
         prefix_init_tensor = prefix_init_tensor[:self.config.num_examples, :self.config.prefix_length,

 import os
 class STEPFinetuningModelConfig(T5Config):
+    model_type = "STEP_finetuning"
     def __init__(self,
                  num_examples: int = 512,
         # There are two cases: (1) we initialize the model after STEP-pretraining, i.e. the tunable prefix is not set
         # and (2) the model has been fine-tuned on downstream data, and hence there is meaningful data in the tunable prefix
+        # Initialize the prefix with NaNs. If we initialize from STEP-pretraining, this will be overwritten by a custom version of from_pretrained
         # if we initialize after fine-tuning, the NaNs will be overwritten anyway.
         self.prefix_embedding = torch.nn.Parameter(torch.nan + torch.zeros((1, self.config.prefix_length, self.config.d_model)))
     def _initialize_prefix(self):
         prefix_init_tensor = self.prefix_init_tensor
         if self.config.random_selection:
+            # randomize selection of edgewise tranformations to average for initialization the prefix.
             prefix_init_tensor = prefix_init_tensor[torch.randperm(prefix_init_tensor.shape[0]), :, :]
         prefix_init_tensor = prefix_init_tensor[:self.config.num_examples, :self.config.prefix_length,