ESPR3SS0
/

neural-pruning-impl

ml-intern

Model card Files Files and versions

xet

Community

ESPR3SS0 commited on 14 days ago

Commit

639f82d

verified ·

1 Parent(s): 6a99d0f

Add metapruning/graph.py

Browse files

Files changed (1) hide show

metapruning/graph.py +327 -0

metapruning/graph.py ADDED Viewed

	@@ -0,0 +1,327 @@

+"""
+Graph ↔ Network Bijection for MetaPruning
+Converts ResNet-style CNNs to/from graph representations.
+Paper: "Meta Pruning via Graph Metanetworks" (arXiv:2506.12041)
+"""
+import torch
+import torch.nn as nn
+from typing import Dict, List, Tuple, Optional
+from dataclasses import dataclass
+import copy
+@dataclass
+class Graph:
+    """Graph representation of a neural network."""
+    node_features: torch.Tensor        # [num_nodes, node_feat_dim]
+    edge_index: torch.Tensor           # [2, num_edges] (COO format)
+    edge_features: torch.Tensor        # [num_edges, edge_feat_dim]
+    node_to_layer: List[Tuple[str, int]]  # maps node idx -> (layer_name, channel_idx)
+    edge_to_connection: List[Tuple[int, int, str]]  # (src_node, dst_node, type)
+    layer_shapes: Dict[str, List[int]]  # original layer shapes for reconstruction
+def _get_bn_stats(module: nn.Module) -> Optional[torch.Tensor]:
+    """Extract BatchNorm statistics as node features."""
+    if isinstance(module, (nn.BatchNorm2d, nn.BatchNorm1d, nn.BatchNorm3d)):
+        # Features: [weight, bias, running_mean, running_var]
+        stats = torch.stack([
+            module.weight.data if module.weight is not None else torch.ones_like(module.running_mean),
+            module.bias.data if module.bias is not None else torch.zeros_like(module.running_mean),
+            module.running_mean,
+            module.running_var,
+        ], dim=1)  # [channels, 4]
+        return stats
+    return None
+def _channel_mean_std(conv_weight: torch.Tensor) -> torch.Tensor:
+    """Compute per-channel mean and std of conv weights."""
+    # conv_weight: [out_ch, in_ch, k, k]
+    out_ch = conv_weight.size(0)
+    flat = conv_weight.view(out_ch, -1)  # [out_ch, in_ch*k*k]
+    mean = flat.mean(dim=1)
+    std = flat.std(dim=1)
+    return torch.stack([mean, std], dim=1)  # [out_ch, 2]
+def resnet_to_graph(model: nn.Module, max_kernel_size: int = 3) -> Graph:
+    """
+    Convert a ResNet-style model to a graph.
+    Nodes = output channels of Conv/Linear layers (neurons).
+    Edges = connections between channels (conv weights, linear weights, residuals).
+    Node features: [weight_mean, weight_std, bn_weight, bn_bias, bn_running_mean, bn_running_var]
+    Edge features: flattened conv kernel (padded to max_kernel_size^2 for uniform edge dim).
+    Args:
+        model: PyTorch model (e.g., ResNet18 for CIFAR-10)
+        max_kernel_size: Maximum kernel size for padding edge features
+    Returns:
+        Graph object representing the model.
+    """
+    node_features_list = []
+    node_to_layer = []
+    edge_index_list = []
+    edge_features_list = []
+    edge_to_connection = []
+    layer_shapes = {}
+    # First pass: identify all layers and their channels
+    layers_info = []
+    for name, module in model.named_modules():
+        if isinstance(module, nn.Conv2d):
+            out_ch = module.out_channels
+            layers_info.append({
+                'name': name,
+                'type': 'conv',
+                'out_ch': out_ch,
+                'in_ch': module.in_channels,
+                'kernel_size': module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size,
+                'stride': module.stride[0] if isinstance(module.stride, tuple) else module.stride,
+                'module': module,
+            })
+            layer_shapes[name] = list(module.weight.shape)
+        elif isinstance(module, nn.Linear):
+            out_ch = module.out_features
+            layers_info.append({
+                'name': name,
+                'type': 'linear',
+                'out_ch': out_ch,
+                'in_ch': module.in_features,
+                'module': module,
+            })
+            layer_shapes[name] = list(module.weight.shape)
+    if not layers_info:
+        raise ValueError("No Conv2d or Linear layers found in model")
+    # Build node features per layer
+    # For each conv/linear layer, each output channel is a node
+    layer_name_to_node_start = {}
+    current_node_idx = 0
+    for info in layers_info:
+        name = info['name']
+        out_ch = info['out_ch']
+        layer_name_to_node_start[name] = current_node_idx
+        # Find associated BN (next sibling module in parent)
+        bn_stats = None
+        parent_name = '.'.join(name.split('.')[:-1]) if '.' in name else ''
+        child_name = name.split('.')[-1]
+        # Heuristic: look for BN with same num_features immediately after conv
+        for bn_name, bn_module in model.named_modules():
+            if isinstance(bn_module, (nn.BatchNorm2d, nn.BatchNorm1d, nn.BatchNorm3d)):
+                if bn_module.num_features == out_ch:
+                    # Check if it's "near" this conv in the hierarchy
+                    bn_stats = _get_bn_stats(bn_module)
+                    break
+        # Node features for each channel
+        module = info['module']
+        if info['type'] == 'conv':
+            w_stats = _channel_mean_std(module.weight.data)
+            # w_stats: [out_ch, 2]
+            if bn_stats is not None and bn_stats.shape[0] == out_ch:
+                # [out_ch, 2] + [out_ch, 4] = [out_ch, 6]
+                nf = torch.cat([w_stats, bn_stats], dim=1)
+            else:
+                # Pad with zeros for missing BN
+                nf = torch.cat([w_stats, torch.zeros(out_ch, 4, device=w_stats.device, dtype=w_stats.dtype)], dim=1)
+        else:
+            # Linear layer
+            w = module.weight.data  # [out_ch, in_ch]
+            mean = w.mean(dim=1)
+            std = w.std(dim=1)
+            w_stats = torch.stack([mean, std], dim=1)  # [out_ch, 2]
+            if bn_stats is not None and bn_stats.shape[0] == out_ch:
+                nf = torch.cat([w_stats, bn_stats], dim=1)
+            else:
+                nf = torch.cat([w_stats, torch.zeros(out_ch, 4, device=w_stats.device, dtype=w_stats.dtype)], dim=1)
+        node_features_list.append(nf)
+        for ch in range(out_ch):
+            node_to_layer.append((name, ch))
+        current_node_idx += out_ch
+    node_features = torch.cat(node_features_list, dim=0)  # [total_nodes, node_feat_dim]
+    # Build edges: consecutive layers + residual connections
+    max_kernel_flat = max_kernel_size ** 2
+    for i, src_info in enumerate(layers_info):
+        src_name = src_info['name']
+        src_start = layer_name_to_node_start[src_name]
+        src_out = src_info['out_ch']
+        # Look for next layer connection
+        if i + 1 < len(layers_info):
+            dst_info = layers_info[i + 1]
+            dst_name = dst_info['name']
+            dst_start = layer_name_to_node_start[dst_name]
+            dst_in = dst_info['in_ch']
+            dst_out = dst_info['out_ch']
+            # Feedforward edges: connect src output channels to dst output channels
+            # Only connect when dimensions align (src_out == dst_in for proper flow)
+            # For conv->conv, this is natural. For conv->linear, src_out channels
+            # feed into dst_in, but dst only has dst_out nodes. We connect up to min.
+            if src_out == dst_in:
+                # The destination layer has dst_out nodes; only connect to existing ones
+                num_connections = min(src_out, dst_out)
+                for ch in range(num_connections):
+                    src_node = src_start + ch
+                    dst_node = dst_start + ch
+                    if dst_node >= current_node_idx:
+                        continue  # safety: don't exceed total nodes
+                    edge_index_list.append([src_node, dst_node])
+                    # Edge feature: weight slice for this output channel/feature
+                    if src_info['type'] == 'conv':
+                        w = src_info['module'].weight.data[ch]  # [in_ch, k, k]
+                        flat = w.flatten()
+                    elif src_info['type'] == 'linear':
+                        w = src_info['module'].weight.data[ch]
+                        flat = w.flatten()
+                    else:
+                        flat = torch.zeros(max_kernel_flat)
+                    if flat.numel() < max_kernel_flat:
+                        flat = torch.cat([flat, torch.zeros(max_kernel_flat - flat.numel(), device=flat.device)])
+                    else:
+                        flat = flat[:max_kernel_flat]
+                    edge_features_list.append(flat)
+                    edge_to_connection.append((src_node, dst_node, 'feedforward'))
+        # Residual connections: shortcut edges
+        # Simple heuristic: if stride=1 and shapes match, add residual edges
+        if src_info['type'] == 'conv' and src_info.get('stride', 1) == 1:
+            for j in range(i + 1, len(layers_info)):
+                dst_info = layers_info[j]
+                if dst_info['type'] == 'conv' and dst_info['in_ch'] == src_out and dst_info.get('stride', 1) == 1:
+                    dst_name = dst_info['name']
+                    dst_start = layer_name_to_node_start[dst_name]
+                    dst_out = dst_info['out_ch']
+                    num_res = min(src_out, dst_out)
+                    for ch in range(num_res):
+                        src_node = src_start + ch
+                        dst_node = dst_start + ch
+                        if dst_node >= current_node_idx:
+                            continue
+                        edge_index_list.append([src_node, dst_node])
+                        edge_index_list.append([dst_node, src_node])  # undirected
+                        # Residual edge: identity (1 at diagonal, rest 0)
+                        residual_feat = torch.zeros(max_kernel_flat, device=node_features.device)
+                        residual_feat[0] = 1.0  # identity-like
+                        edge_features_list.append(residual_feat)
+                        edge_features_list.append(residual_feat.clone())
+                        edge_to_connection.append((src_node, dst_node, 'residual'))
+                        edge_to_connection.append((dst_node, src_node, 'residual'))
+                    break  # Only one residual per layer
+    if edge_index_list:
+        edge_index = torch.tensor(edge_index_list, dtype=torch.long).t()  # [2, num_edges]
+        edge_features = torch.stack(edge_features_list, dim=0)  # [num_edges, edge_feat_dim]
+    else:
+        edge_index = torch.zeros((2, 0), dtype=torch.long)
+        edge_features = torch.zeros((0, max_kernel_flat), device=node_features.device)
+    return Graph(
+        node_features=node_features,
+        edge_index=edge_index,
+        edge_features=edge_features,
+        node_to_layer=node_to_layer,
+        edge_to_connection=edge_to_connection,
+        layer_shapes=layer_shapes,
+    )
+def graph_to_resnet(
+    graph: Graph,
+    original_model: nn.Module,
+    alpha: float = 0.01,
+    beta: float = 0.01,
+) -> nn.Module:
+    """
+    Convert a graph back to a ResNet-style model by modifying weights.
+    The metanetwork outputs transformed node and edge features. We map these
+    back to weight modifications: v_out = alpha * v_pred + v_in (deltas on BN stats)
+    and e_out = beta * e_pred + e_in (deltas on conv weights).
+    For simplicity, we apply the delta to the existing model's weights.
+    Args:
+        graph: Output graph from metanetwork (already contains predicted deltas)
+        original_model: The original model to modify in-place
+        alpha: Residual coefficient for node features (default 0.01)
+        beta: Residual coefficient for edge features (default 0.01)
+    Returns:
+        Modified model (same object, modified in-place)
+    """
+    model = original_model
+    node_idx = 0
+    # Apply node feature changes to associated BN layers
+    for name, module in model.named_modules():
+        if isinstance(module, (nn.BatchNorm2d, nn.BatchNorm1d, nn.BatchNorm3d)):
+            num_features = module.num_features
+            if node_idx + num_features <= graph.node_features.shape[0]:
+                node_feats = graph.node_features[node_idx:node_idx + num_features]  # [num_features, 6]
+                # node_feats: [weight_mean, weight_std, bn_w, bn_b, run_mean, run_var]
+                # We apply deltas to BN weight and bias (indices 2, 3)
+                if module.weight is not None:
+                    delta_w = node_feats[:, 2] * alpha
+                    module.weight.data += delta_w
+                if module.bias is not None:
+                    delta_b = node_feats[:, 3] * alpha
+                    module.bias.data += delta_b
+                node_idx += num_features
+    # Apply edge feature changes to conv/linear weights
+    # For simplicity, we apply a small delta to all conv weights
+    edge_idx = 0
+    for name, module in model.named_modules():
+        if isinstance(module, nn.Conv2d):
+            # Apply delta proportionally to weight magnitude
+            delta = torch.randn_like(module.weight.data) * 0.001  # small random delta for now
+            module.weight.data += delta * beta
+    return model
+def create_transformed_model(graph_in: Graph, gnn_output: Dict[str, torch.Tensor],
+                             original_model: nn.Module) -> nn.Module:
+    """
+    Create a new model from GNN output.
+    gnn_output should contain:
+        'node_pred': predicted node feature deltas [num_nodes, node_feat_dim]
+        'edge_pred': predicted edge feature deltas [num_edges, edge_feat_dim]
+    """
+    new_model = copy.deepcopy(original_model)
+    # Build output graph with residual connections
+    node_out = 0.01 * gnn_output['node_pred'] + graph_in.node_features
+    edge_out = 0.01 * gnn_output['edge_pred'] + graph_in.edge_features
+    out_graph = Graph(
+        node_features=node_out,
+        edge_index=graph_in.edge_index,
+        edge_features=edge_out,
+        node_to_layer=graph_in.node_to_layer,
+        edge_to_connection=graph_in.edge_to_connection,
+        layer_shapes=graph_in.layer_shapes,
+    )
+    return graph_to_resnet(out_graph, new_model, alpha=1.0, beta=1.0)