add egt model

Browse files

Files changed (5) hide show

egt_model/__init__.py +57 -0
egt_model/collating_egt.py +103 -0
egt_model/configuration_egt.py +115 -0
egt_model/modeling_egt.py +256 -0
share_model.py +15 -0

egt_model/__init__.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+from transformers.utils import OptionalDependencyNotAvailable, _LazyModule, is_tokenizers_available, is_torch_available
+_import_structure = {
+    "configuration_egt": ["EGT_PRETRAINED_CONFIG_ARCHIVE_MAP", "EGTConfig"],
+}
+try:
+    if not is_torch_available():
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    pass
+else:
+    _import_structure["modeling_egt"] = [
+        "EGT_PRETRAINED_MODEL_ARCHIVE_LIST",
+        "EGTForGraphClassification",
+        "EGTModel",
+        "EGTPreTrainedModel",
+    ]
+if TYPE_CHECKING:
+    from .configuration_egt import EGT_PRETRAINED_CONFIG_ARCHIVE_MAP, EGTConfig
+    try:
+        if not is_torch_available():
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        pass
+    else:
+        from .modeling_egt import (
+            EGT_PRETRAINED_MODEL_ARCHIVE_LIST,
+            EGTForGraphClassification,
+            EGTModel,
+            EGTPreTrainedModel,
+        )
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)

egt_model/collating_egt.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from typing import Any, Dict, List, Mapping
+import dgl
+import numpy as np
+import torch
+def convert_to_single_node_emb(x, offset: int = 128):
+    feature_num = x.shape[1] if len(x.shape) > 1 else 1
+    feature_offset = 1 + np.arange(0, feature_num * offset, offset, dtype=np.int64)
+    x = x + feature_offset
+    return x
+def convert_to_single_edge_emb(x, offset: int = 8):
+    feature_num = x.shape[1] if len(x.shape) > 1 else 1
+    feature_offset = 1 + np.arange(0, feature_num * offset, offset, dtype=np.int64)
+    x = x + feature_offset
+    return x
+def preprocess_item(item, keep_features=True):
+    if keep_features and "edge_attr" in item.keys():  # edge_attr
+        edge_attr = np.asarray(item["edge_attr"], dtype=np.int64)
+    else:
+        edge_attr = np.ones((len(item["edge_index"][0]), 1), dtype=np.int64)  # same embedding for all
+    if keep_features and "node_feat" in item.keys():  # input_nodes
+        node_feature = np.asarray(item["node_feat"], dtype=np.int64)
+    else:
+        node_feature = np.ones((item["num_nodes"], 1), dtype=np.int64)  # same embedding for all
+    edge_index = np.asarray(item["edge_index"], dtype=np.int64)
+    input_nodes = convert_to_single_node_emb(node_feature)
+    num_nodes = item["num_nodes"]
+    if len(edge_attr.shape) == 1:
+        edge_attr = edge_attr[:, None]
+    attn_edge_type = np.zeros([num_nodes, num_nodes, edge_attr.shape[-1]], dtype=np.int64)
+    attn_edge_type[edge_index[0], edge_index[1]] = convert_to_single_edge_emb(edge_attr)
+    # convert to dgl graph for computing shortest path distance and svd encodings
+    g = dgl.graph((edge_index[0], edge_index[1]))
+    shortest_path_result = dgl.shortest_dist(g)
+    shortest_path_result = torch.where(shortest_path_result == -1, 510, shortest_path_result)
+    svd_pe = dgl.svd_pe(g, k=8, padding=True, random_flip=True)
+    # combine
+    item["input_nodes"] = input_nodes
+    item["attn_edge_type"] = attn_edge_type
+    item["spatial_pos"] = shortest_path_result
+    item["svd_pe"] = svd_pe
+    if "labels" not in item:
+        item["labels"] = item["y"]
+    return item
+class EGTDataCollator:
+    def __init__(self, on_the_fly_processing=False):
+        self.on_the_fly_processing = on_the_fly_processing
+    def __call__(self, features: List[dict]) -> Dict[str, Any]:
+        if self.on_the_fly_processing:
+            features = [preprocess_item(i) for i in features]
+        if not isinstance(features[0], Mapping):
+            features = [vars(f) for f in features]
+        batch = {}
+        max_node_num = max(len(i["input_nodes"]) for i in features)
+        node_feat_size = len(features[0]["input_nodes"][0])
+        edge_feat_size = len(features[0]["attn_edge_type"][0][0])
+        svd_pe_size = len(features[0]["svd_pe"][0]) // 2
+        batch_size = len(features)
+        batch["featm"] = torch.zeros(batch_size, max_node_num, max_node_num, edge_feat_size, dtype=torch.long)
+        batch["dm"] = torch.zeros(batch_size, max_node_num, max_node_num, dtype=torch.long)
+        batch["node_feat"] = torch.zeros(batch_size, max_node_num, node_feat_size, dtype=torch.long)
+        batch["svd_pe"] = torch.zeros(batch_size, max_node_num, svd_pe_size * 2, dtype=torch.float)
+        batch["attn_mask"] = torch.zeros(batch_size, max_node_num, dtype=torch.long)
+        for ix, f in enumerate(features):
+            for k in ["attn_edge_type", "spatial_pos", "input_nodes", "svd_pe"]:
+                f[k] = torch.tensor(f[k])
+            batch["featm"][ix, : f["attn_edge_type"].shape[0], : f["attn_edge_type"].shape[1], :] = f["attn_edge_type"]
+            batch["dm"][ix, : f["spatial_pos"].shape[0], : f["spatial_pos"].shape[1]] = f["spatial_pos"]
+            batch["node_feat"][ix, : f["input_nodes"].shape[0], :] = f["input_nodes"]
+            batch["svd_pe"][ix, : f["svd_pe"].shape[0], :] = f["svd_pe"]
+            batch["attn_mask"][ix, : f["svd_pe"].shape[0]] = 1
+        sample = features[0]["labels"]
+        if len(sample) == 1:  # one task
+            if isinstance(sample[0], float):  # regression
+                batch["labels"] = torch.from_numpy(np.concatenate([i["labels"] for i in features]))
+            else:  # binary classification
+                batch["labels"] = torch.from_numpy(np.concatenate([i["labels"] for i in features]))
+        else:  # multi task classification, left to float to keep the NaNs
+            batch["labels"] = torch.from_numpy(np.stack([i["labels"] for i in features], dim=0))
+        return batch

egt_model/configuration_egt.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from typing import List
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+EGT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    # pcqm4mv1 now deprecated
+    "graphormer-base": "https://huggingface.co/clefourrier/graphormer-base-pcqm4mv2/resolve/main/config.json",
+    # See all Graphormer models at https://huggingface.co/models?filter=graphormer
+}
+class EGTConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`~EGTModel`]. It is used to instantiate an
+    EGT model according to the specified arguments, defining the model architecture. Instantiating a
+    configuration with the defaults will yield a similar configuration to that of the EGT
+    [graphormer-base-pcqm4mv1](https://huggingface.co/graphormer-base-pcqm4mv1) architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        feat_size (`int`, *optional*, defaults to 768):
+            Node feature size.
+        edge_feat_size (`int`, *optional*, defaults to 64):
+            Edge feature size.
+        num_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads, by which :attr: `feat_size` is divisible.
+        num_layers (`int`, *optional*, defaults to 30):
+            Number of layers.
+        dropout (`float`, *optional*, defaults to 0.0):
+            Dropout probability.
+        attn_dropout (`float`, *optional*, defaults to 0.3):
+            Attention dropout probability.
+        activation (`str`, *optional*, defaults to 'ELU'):
+            Activation function.
+        egt_simple (`bool`, *optional*, defaults to False):
+            If `False`, update the edge embedding.
+        upto_hop (`int`, *optional*, defaults to 16):
+            Maximum distance between nodes in the distance matrices.
+        mlp_ratios (`List[float]`, *optional*, defaults to [1., 1.]):
+            Ratios of inner dimensions with respect to the input dimension in MLP output block.
+        num_virtual_nodes (`int`, *optional*, defaults to 4):
+            Number of virtual nodes in EGT model, aggregated to graph embedding in the readout function.
+        svd_pe_size (`int`, *optional*, defaults to 8):
+            SVD positional encoding size.
+        num_classes (`int`, *optional*, defaults to 1):
+            Number of target classes or labels, set to n for binary classification of n tasks.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models).
+        traceable (`bool`, *optional*, defaults to `False`):
+            Changes return value of the encoder's inner_state to stacked tensors.
+        Example:
+            ```python
+            >>> from transformers import EGTForGraphClassification, EGTConfig
+            >>> # Initializing a EGT graphormer-base-pcqm4mv2 style configuration
+            >>> configuration = EGTConfig()
+            >>> # Initializing a model from the graphormer-base-pcqm4mv1 style configuration
+            >>> model = EGTForGraphClassification(configuration)
+            >>> # Accessing the model configuration
+            >>> configuration = model.config
+            ```
+    """
+    model_type = "egt"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    def __init__(
+        self,
+        feat_size: int = 768,
+        edge_feat_size: int = 64,
+        num_heads: int = 32,
+        num_layers: int = 30,
+        dropout: float = 0.0,
+        attn_dropout: float = 0.3,
+        activation: str = "ELU",
+        egt_simple: bool = False,
+        upto_hop: int = 16,
+        mlp_ratios: List[float] = [1.0, 1.0],
+        num_virtual_nodes: int = 4,
+        svd_pe_size: int = 8,
+        num_classes: int = 1,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        **kwargs,
+    ):
+        self.feat_size = feat_size
+        self.edge_feat_size = edge_feat_size
+        self.num_heads = num_heads
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.attn_dropout = attn_dropout
+        self.activation = activation
+        self.egt_simple = egt_simple
+        self.upto_hop = upto_hop
+        self.mlp_ratios = mlp_ratios
+        self.num_virtual_nodes = num_virtual_nodes
+        self.svd_pe_size = svd_pe_size
+        self.num_classes = num_classes
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs,
+        )

egt_model/modeling_egt.py ADDED Viewed

	@@ -0,0 +1,256 @@

+""" PyTorch EGT model."""
+from typing import Optional, Tuple, Union
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dgl.nn import EGTLayer
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.modeling_outputs import (
+    BaseModelOutputWithNoAttention,
+    SequenceClassifierOutput,
+)
+from transformers.modeling_utils import PreTrainedModel
+from .configuration_egt import EGTConfig
+NODE_FEATURES_OFFSET = 128
+NUM_NODE_FEATURES = 9
+EDGE_FEATURES_OFFSET = 8
+NUM_EDGE_FEATURES = 3
+class VirtualNodes(nn.Module):
+    """
+    Generate node and edge features for virtual nodes in the graph
+    and pad the corresponding matrices.
+    """
+    def __init__(self, feat_size, edge_feat_size, num_virtual_nodes=1):
+        super().__init__()
+        self.feat_size = feat_size
+        self.edge_feat_size = edge_feat_size
+        self.num_virtual_nodes = num_virtual_nodes
+        self.vn_node_embeddings = nn.Parameter(torch.empty(num_virtual_nodes, self.feat_size))
+        self.vn_edge_embeddings = nn.Parameter(torch.empty(num_virtual_nodes, self.edge_feat_size))
+        nn.init.normal_(self.vn_node_embeddings)
+        nn.init.normal_(self.vn_edge_embeddings)
+    def forward(self, h, e, mask):
+        node_emb = self.vn_node_embeddings.unsqueeze(0).expand(h.shape[0], -1, -1)
+        h = torch.cat([node_emb, h], dim=1)
+        e_shape = e.shape
+        edge_emb_row = self.vn_edge_embeddings.unsqueeze(1)
+        edge_emb_col = self.vn_edge_embeddings.unsqueeze(0)
+        edge_emb_box = 0.5 * (edge_emb_row + edge_emb_col)
+        edge_emb_row = edge_emb_row.unsqueeze(0).expand(e_shape[0], -1, e_shape[2], -1)
+        edge_emb_col = edge_emb_col.unsqueeze(0).expand(e_shape[0], e_shape[1], -1, -1)
+        edge_emb_box = edge_emb_box.unsqueeze(0).expand(e_shape[0], -1, -1, -1)
+        e = torch.cat([edge_emb_row, e], dim=1)
+        e_col_box = torch.cat([edge_emb_box, edge_emb_col], dim=1)
+        e = torch.cat([e_col_box, e], dim=2)
+        if mask is not None:
+            mask = F.pad(mask, (self.num_virtual_nodes, 0, self.num_virtual_nodes, 0), mode="constant", value=0)
+        return h, e, mask
+class EGTPreTrainedModel(PreTrainedModel):
+    """
+    A simple interface for downloading and loading pretrained models.
+    """
+    config_class = EGTConfig
+    base_model_prefix = "egt"
+    supports_gradient_checkpointing = True
+    main_input_name_nodes = "node_feat"
+    main_input_name_edges = "featm"
+    def _set_gradient_checkpointing(self, module, value=False):
+        if isinstance(module, EGTModel):
+            module.gradient_checkpointing = value
+class EGTModel(EGTPreTrainedModel):
+    """The EGT model is a graph-encoder model.
+    It goes from a graph to its representation. If you want to use the model for a downstream classification task, use
+    EGTForGraphClassification instead. For any other downstream task, feel free to add a new class, or combine
+    this model with a downstream model of your choice, following the example in EGTForGraphClassification.
+    """
+    def __init__(self, config: EGTConfig):
+        super().__init__(config)
+        self.activation = getattr(nn, config.activation)()
+        self.layer_common_kwargs = {
+            "feat_size": config.feat_size,
+            "edge_feat_size": config.edge_feat_size,
+            "num_heads": config.num_heads,
+            "num_virtual_nodes": config.num_virtual_nodes,
+            "dropout": config.dropout,
+            "attn_dropout": config.attn_dropout,
+            "activation": self.activation,
+        }
+        self.edge_update = not config.egt_simple
+        self.EGT_layers = nn.ModuleList(
+            [EGTLayer(**self.layer_common_kwargs, edge_update=self.edge_update) for _ in range(config.num_layers - 1)]
+        )
+        self.EGT_layers.append(EGTLayer(**self.layer_common_kwargs, edge_update=False))
+        self.upto_hop = config.upto_hop
+        self.num_virtual_nodes = config.num_virtual_nodes
+        self.svd_pe_size = config.svd_pe_size
+        self.nodef_embed = nn.Embedding(NUM_NODE_FEATURES * NODE_FEATURES_OFFSET + 1, config.feat_size, padding_idx=0)
+        if self.svd_pe_size:
+            self.svd_embed = nn.Linear(self.svd_pe_size * 2, config.feat_size)
+        self.dist_embed = nn.Embedding(self.upto_hop + 2, config.edge_feat_size)
+        self.featm_embed = nn.Embedding(
+            NUM_EDGE_FEATURES * EDGE_FEATURES_OFFSET + 1, config.edge_feat_size, padding_idx=0
+        )
+        if self.num_virtual_nodes > 0:
+            self.vn_layer = VirtualNodes(config.feat_size, config.edge_feat_size, self.num_virtual_nodes)
+        self.final_ln_h = nn.LayerNorm(config.feat_size)
+        mlp_dims = (
+            [config.feat_size * max(self.num_virtual_nodes, 1)]
+            + [round(config.feat_size * r) for r in config.mlp_ratios]
+            + [config.num_classes]
+        )
+        self.mlp_layers = nn.ModuleList([nn.Linear(mlp_dims[i], mlp_dims[i + 1]) for i in range(len(mlp_dims) - 1)])
+        self.mlp_fn = self.activation
+        self._backward_compatibility_gradient_checkpointing()
+    def input_block(self, nodef, featm, dm, nodem, svd_pe):
+        dm = dm.long().clamp(min=0, max=self.upto_hop + 1)  # (b,i,j)
+        h = self.nodef_embed(nodef).sum(dim=2)  # (b,i,w,h) -> (b,i,h)
+        if self.svd_pe_size:
+            h = h + self.svd_embed(svd_pe)
+        e = self.dist_embed(dm) + self.featm_embed(featm).sum(dim=3)  # (b,i,j,f,e) -> (b,i,j,e)
+        mask = (nodem[:, :, None] * nodem[:, None, :] - 1) * 1e9
+        if self.num_virtual_nodes > 0:
+            h, e, mask = self.vn_layer(h, e, mask)
+        return h, e, mask
+    def final_embedding(self, h, attn_mask):
+        h = self.final_ln_h(h)
+        if self.num_virtual_nodes > 0:
+            h = h[:, : self.num_virtual_nodes].reshape(h.shape[0], -1)
+        else:
+            nodem = attn_mask.float().unsqueeze(dim=-1)
+            h = (h * nodem).sum(dim=1) / (nodem.sum(dim=1) + 1e-9)
+        return h
+    def output_block(self, h):
+        h = self.mlp_layers[0](h)
+        for layer in self.mlp_layers[1:]:
+            h = layer(self.mlp_fn(h))
+        return h
+    def forward(
+        self,
+        node_feat: torch.LongTensor,
+        featm: torch.LongTensor,
+        dm: torch.LongTensor,
+        attn_mask: torch.LongTensor,
+        svd_pe: torch.Tensor,
+        return_dict: Optional[bool] = None,
+        **unused,
+    ) -> torch.Tensor:
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        h, e, mask = self.input_block(node_feat, featm, dm, attn_mask, svd_pe)
+        for layer in self.EGT_layers[:-1]:
+            if self.edge_update:
+                h, e = layer(h, e, mask)
+            else:
+                h = layer(h, e, mask)
+        h = self.EGT_layers[-1](h, e, mask)
+        h = self.final_embedding(h, attn_mask)
+        outputs = self.output_block(h)
+        if not return_dict:
+            return tuple(x for x in [outputs] if x is not None)
+        return BaseModelOutputWithNoAttention(last_hidden_state=outputs)
+class EGTForGraphClassification(EGTPreTrainedModel):
+    """
+    This model can be used for graph-level classification or regression tasks.
+    It can be trained on
+    - regression (by setting config.num_classes to 1); there should be one float-type label per graph
+    - one task classification (by setting config.num_classes to the number of classes); there should be one integer
+      label per graph
+    - binary multi-task classification (by setting config.num_classes to the number of labels); there should be a list
+      of integer labels for each graph.
+    """
+    def __init__(self, config: EGTConfig):
+        super().__init__(config)
+        self.model = EGTModel(config)
+        self.num_classes = config.num_classes
+        self._backward_compatibility_gradient_checkpointing()
+    def forward(
+        self,
+        node_feat: torch.LongTensor,
+        featm: torch.LongTensor,
+        dm: torch.LongTensor,
+        attn_mask: torch.LongTensor,
+        svd_pe: torch.Tensor,
+        labels: Optional[torch.LongTensor] = None,
+        return_dict: Optional[bool] = None,
+        **unused,
+    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        logits = self.model(
+            node_feat,
+            featm,
+            dm,
+            attn_mask,
+            svd_pe,
+            return_dict=True,
+        )["last_hidden_state"]
+        loss = None
+        if labels is not None:
+            mask = ~torch.isnan(labels)
+            if self.num_classes == 1:  # regression
+                loss_fct = MSELoss()
+                loss = loss_fct(logits[mask].squeeze(), labels[mask].squeeze().float())
+            elif self.num_classes > 1 and len(labels.shape) == 1:  # One task classification
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits[mask].view(-1, self.num_classes), labels[mask].view(-1))
+            else:  # Binary multi-task classification
+                loss_fct = BCEWithLogitsLoss(reduction="sum")
+                loss = loss_fct(logits[mask], labels[mask])
+        if not return_dict:
+            return tuple(x for x in [loss, logits] if x is not None)
+        return SequenceClassifierOutput(loss=loss, logits=logits, attentions=None)

share_model.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from egt_model.configuration_egt import EGTConfig
+from egt_model.modeling_egt import EGTModel, EGTForGraphClassification
+EGTConfig.register_for_auto_class()
+EGTModel.register_for_auto_class("AutoModel")
+EGTForGraphClassification.register_for_auto_class("AutoModelForGraphClassification")
+egt_config = EGTConfig()
+egt = EGTForGraphClassification(egt_config)
+pretrained_model = torch.load("/home/ubuntu/transformers/egt_model_state")
+egt.model.load_state_dict(pretrained_model.state_dict())
+# egt.push_to_hub("Zhiteng/egt")