Fix import error and add source_files to config

Files changed (3) hide show

config.json CHANGED Viewed

@@ -7,13 +7,7 @@
   "architectures": [
     "FoundationBert"
   ],
-  "source_files": [
-      "foundation_bert.py",
-      "utils/__init__.py",
-      "utils/masked_data_modeling_loss.py",
-      "utils/yaml_util.py",
-      "train_config.yaml"
-    ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",

   "architectures": [
     "FoundationBert"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "hidden_act": "gelu",

foundation_bert.py CHANGED Viewed

@@ -2,15 +2,11 @@ import sys
 import os
 from pathlib import Path
-current_dir = os.path.dirname(os.path.abspath(__file__))
-if current_dir not in sys.path:
-    sys.path.append(current_dir)
 import torch
 import yaml
-from .utils.masked_data_modeling_loss import MaskedDataLossWithSoftmax
 # from ..utils.contrastive_loss import ContrastiveLoss
-from .utils.yaml_util import MyLoader
 from dataclasses import dataclass
 from transformers import BertModel, BertConfig, PretrainedConfig
 from typing import Optional, Union
@@ -128,7 +124,7 @@ class FoundationBert(BertModel):
         self.lm_head = torch.nn.Linear(config.hidden_size, config.vocab_size, bias=False) # isn't used currently
         self.xval_loss = torch.nn.MSELoss(reduction='none') # isn't used currently
-        self.mlm_loss = MaskedDataLossWithSoftmax(ignore=-100, reduction='none') # isn't used currently
         self.distributed_loss = False
     @classmethod

 import os
 from pathlib import Path
 import torch
 import yaml
+# from masked_data_modeling_loss import MaskedDataLossWithSoftmax
 # from ..utils.contrastive_loss import ContrastiveLoss
+from yaml_util import MyLoader
 from dataclasses import dataclass
 from transformers import BertModel, BertConfig, PretrainedConfig
 from typing import Optional, Union
         self.lm_head = torch.nn.Linear(config.hidden_size, config.vocab_size, bias=False) # isn't used currently
         self.xval_loss = torch.nn.MSELoss(reduction='none') # isn't used currently
+        #self.mlm_loss = MaskedDataLossWithSoftmax(ignore=-100, reduction='none') # isn't used currently
         self.distributed_loss = False
     @classmethod

yaml_util.py ADDED Viewed

+import yaml
+class MyLoader(yaml.SafeLoader):
+    # returns
+    def construct_mapping(self, *args, **kwargs):
+        super().add_constructor(None, construct_undefined)
+        # when loading we want to skip keys that require construction,
+        mapping = super().construct_mapping(*args, **kwargs)
+        return mapping
+import typing
+class Tagged(typing.NamedTuple):
+    tag: str
+    value: object
+def construct_undefined(self, node):
+    if isinstance(node, yaml.nodes.ScalarNode):
+        value = self.construct_scalar(node)
+    elif isinstance(node, yaml.nodes.SequenceNode):
+        value = self.construct_sequence(node)
+    elif isinstance(node, yaml.nodes.MappingNode):
+        value = self.construct_mapping(node)
+    else:
+        assert False, f"unexpected node: {node!r}"
+    return Tagged(node.tag, value)