Improves PointNet classification model.

This commit introduces a significantly enhanced PointNet
implementation for binary classification of 6D point cloud
patches.

Key improvements include:

- A deeper architecture with residual connections for enhanced
feature extraction.
- An attention mechanism for improved feature weighting.
- Multi-scale feature aggregation for richer representations.
- An enhanced classification head with residual connections and varying dropout rates for better generalization.

Additionally, the training batch size was reduced to 128, and the model save path was updated to reflect the new, improved model.

Files changed (2) hide show

fast_pointnet_class.py +161 -40
train_pnet_class_cluster.py +2 -2

fast_pointnet_class.py CHANGED Viewed

@@ -10,77 +10,199 @@ import json
 class ClassificationPointNet(nn.Module):
     """
-    PointNet implementation for binary classification from 6D point cloud patches.
     Takes 6D point clouds (x,y,z,r,g,b) and predicts binary classification (edge/not edge).
     """
     def __init__(self, input_dim=6, max_points=1024):
         super(ClassificationPointNet, self).__init__()
         self.max_points = max_points
-        # Point-wise MLPs for feature extraction (deeper network)
         self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 2048, 1)  # Additional layer
-        # Classification head (deeper with more capacity)
-        self.fc1 = nn.Linear(2048, 1024)
-        self.fc2 = nn.Linear(1024, 512)
-        self.fc3 = nn.Linear(512, 256)
-        self.fc4 = nn.Linear(256, 128)
-        self.fc5 = nn.Linear(128, 64)
-        self.fc6 = nn.Linear(64, 1)  # Single output for binary classification
         # Batch normalization layers
         self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(256)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(1024)
-        self.bn6 = nn.BatchNorm1d(2048)
-        # Dropout layers
-        self.dropout1 = nn.Dropout(0.3)
-        self.dropout2 = nn.Dropout(0.4)
-        self.dropout3 = nn.Dropout(0.5)
         self.dropout4 = nn.Dropout(0.4)
-        self.dropout5 = nn.Dropout(0.3)
     def forward(self, x):
         """
-        Forward pass
         Args:
             x: (batch_size, input_dim, max_points) tensor
         Returns:
-            classification: (batch_size, 1) tensor of logits (sigmoid for probability)
         """
         batch_size = x.size(0)
-        # Point-wise feature extraction
         x1 = F.relu(self.bn1(self.conv1(x)))
         x2 = F.relu(self.bn2(self.conv2(x1)))
         x3 = F.relu(self.bn3(self.conv3(x2)))
         x4 = F.relu(self.bn4(self.conv4(x3)))
         x5 = F.relu(self.bn5(self.conv5(x4)))
         x6 = F.relu(self.bn6(self.conv6(x5)))
-        # Global max pooling
-        global_features = torch.max(x6, 2)[0]  # (batch_size, 2048)
-        # Classification head
-        x = F.relu(self.fc1(global_features))
         x = self.dropout1(x)
-        x = F.relu(self.fc2(x))
         x = self.dropout2(x)
-        x = F.relu(self.fc3(x))
         x = self.dropout3(x)
-        x = F.relu(self.fc4(x))
         x = self.dropout4(x)
-        x = F.relu(self.fc5(x))
         x = self.dropout5(x)
-        classification = self.fc6(x)  # (batch_size, 1)
         return classification
@@ -401,6 +523,5 @@ def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device:
         outputs = model(patch_tensor)  # (1, 1)
         probability = torch.sigmoid(outputs).item()
         predicted_class = int(probability > 0.5)
-        confidence = probability if predicted_class == 1 else (1 - probability)
-        return predicted_class, confidence

 class ClassificationPointNet(nn.Module):
     """
+    Enhanced PointNet implementation for binary classification from 6D point cloud patches.
     Takes 6D point clouds (x,y,z,r,g,b) and predicts binary classification (edge/not edge).
+    Features: Residual connections, attention mechanism, multi-scale features, deeper architecture.
     """
     def __init__(self, input_dim=6, max_points=1024):
         super(ClassificationPointNet, self).__init__()
         self.max_points = max_points
+        # Point-wise MLPs with residual connections (much deeper)
         self.conv1 = nn.Conv1d(input_dim, 64, 1)
+        self.conv2 = nn.Conv1d(64, 64, 1)
+        self.conv3 = nn.Conv1d(64, 128, 1)
+        self.conv4 = nn.Conv1d(128, 128, 1)
+        self.conv5 = nn.Conv1d(128, 256, 1)
+        self.conv6 = nn.Conv1d(256, 256, 1)
+        self.conv7 = nn.Conv1d(256, 512, 1)
+        self.conv8 = nn.Conv1d(512, 512, 1)
+        self.conv9 = nn.Conv1d(512, 1024, 1)
+        self.conv10 = nn.Conv1d(1024, 1024, 1)
+        self.conv11 = nn.Conv1d(1024, 2048, 1)
+        # Residual connection layers
+        self.res_conv1 = nn.Conv1d(64, 128, 1)
+        self.res_conv2 = nn.Conv1d(128, 256, 1)
+        self.res_conv3 = nn.Conv1d(256, 512, 1)
+        self.res_conv4 = nn.Conv1d(512, 1024, 1)
+        # Self-attention mechanism
+        self.attention = nn.MultiheadAttention(embed_dim=2048, num_heads=8, batch_first=True)
+        self.attention_norm = nn.LayerNorm(2048)
+        # Multi-scale feature aggregation
+        self.scale_conv1 = nn.Conv1d(2048, 512, 1)
+        self.scale_conv2 = nn.Conv1d(2048, 512, 1)
+        self.scale_conv3 = nn.Conv1d(2048, 512, 1)
+        # Enhanced classification head with residual connections
+        self.fc1 = nn.Linear(4096, 2048)  # Increased input due to multi-scale features
+        self.fc2 = nn.Linear(2048, 2048)
+        self.fc3 = nn.Linear(2048, 1024)
+        self.fc4 = nn.Linear(1024, 1024)
+        self.fc5 = nn.Linear(1024, 512)
+        self.fc6 = nn.Linear(512, 512)
+        self.fc7 = nn.Linear(512, 256)
+        self.fc8 = nn.Linear(256, 128)
+        self.fc9 = nn.Linear(128, 64)
+        self.fc10 = nn.Linear(64, 1)
+        # Residual connections for FC layers
+        self.fc_res1 = nn.Linear(2048, 1024)
+        self.fc_res2 = nn.Linear(1024, 512)
+        self.fc_res3 = nn.Linear(512, 256)
         # Batch normalization layers
         self.bn1 = nn.BatchNorm1d(64)
+        self.bn2 = nn.BatchNorm1d(64)
+        self.bn3 = nn.BatchNorm1d(128)
+        self.bn4 = nn.BatchNorm1d(128)
+        self.bn5 = nn.BatchNorm1d(256)
+        self.bn6 = nn.BatchNorm1d(256)
+        self.bn7 = nn.BatchNorm1d(512)
+        self.bn8 = nn.BatchNorm1d(512)
+        self.bn9 = nn.BatchNorm1d(1024)
+        self.bn10 = nn.BatchNorm1d(1024)
+        self.bn11 = nn.BatchNorm1d(2048)
+        # Scale batch norms
+        self.scale_bn1 = nn.BatchNorm1d(512)
+        self.scale_bn2 = nn.BatchNorm1d(512)
+        self.scale_bn3 = nn.BatchNorm1d(512)
+        # FC batch norms
+        self.fc_bn1 = nn.BatchNorm1d(2048)
+        self.fc_bn2 = nn.BatchNorm1d(2048)
+        self.fc_bn3 = nn.BatchNorm1d(1024)
+        self.fc_bn4 = nn.BatchNorm1d(1024)
+        self.fc_bn5 = nn.BatchNorm1d(512)
+        self.fc_bn6 = nn.BatchNorm1d(512)
+        self.fc_bn7 = nn.BatchNorm1d(256)
+        self.fc_bn8 = nn.BatchNorm1d(128)
+        # Dropout layers with varying rates
+        self.dropout1 = nn.Dropout(0.1)
+        self.dropout2 = nn.Dropout(0.2)
+        self.dropout3 = nn.Dropout(0.3)
         self.dropout4 = nn.Dropout(0.4)
+        self.dropout5 = nn.Dropout(0.5)
+        self.dropout6 = nn.Dropout(0.4)
+        self.dropout7 = nn.Dropout(0.3)
+        self.dropout8 = nn.Dropout(0.2)
     def forward(self, x):
         """
+        Forward pass with residual connections and attention
         Args:
             x: (batch_size, input_dim, max_points) tensor
         Returns:
+            classification: (batch_size, 1) tensor of logits
         """
         batch_size = x.size(0)
+        # Deep point-wise feature extraction with residual connections
         x1 = F.relu(self.bn1(self.conv1(x)))
         x2 = F.relu(self.bn2(self.conv2(x1)))
+        x2 = x2 + x1  # Residual connection
         x3 = F.relu(self.bn3(self.conv3(x2)))
         x4 = F.relu(self.bn4(self.conv4(x3)))
+        res1 = self.res_conv1(x2)
+        x4 = x4 + res1  # Residual connection
         x5 = F.relu(self.bn5(self.conv5(x4)))
         x6 = F.relu(self.bn6(self.conv6(x5)))
+        res2 = self.res_conv2(x4)
+        x6 = x6 + res2  # Residual connection
+        x7 = F.relu(self.bn7(self.conv7(x6)))
+        x8 = F.relu(self.bn8(self.conv8(x7)))
+        res3 = self.res_conv3(x6)
+        x8 = x8 + res3  # Residual connection
+        x9 = F.relu(self.bn9(self.conv9(x8)))
+        x10 = F.relu(self.bn10(self.conv10(x9)))
+        res4 = self.res_conv4(x8)
+        x10 = x10 + res4  # Residual connection
+        x11 = F.relu(self.bn11(self.conv11(x10)))
+        # Multi-scale global pooling
+        # Max pooling
+        global_max = torch.max(x11, 2)[0]  # (batch_size, 2048)
+        # Average pooling
+        global_avg = torch.mean(x11, 2)  # (batch_size, 2048)
+        # Attention-based pooling
+        x11_transposed = x11.transpose(1, 2)  # (batch_size, max_points, 2048)
+        attended, _ = self.attention(x11_transposed, x11_transposed, x11_transposed)
+        attended = self.attention_norm(attended + x11_transposed)
+        global_att = torch.mean(attended, 1)  # (batch_size, 2048)
+        # Multi-scale feature extraction
+        scale1 = F.relu(self.scale_bn1(self.scale_conv1(x11)))
+        scale1_pool = torch.max(scale1, 2)[0]
+        scale2 = F.relu(self.scale_bn2(self.scale_conv2(x11)))
+        scale2_pool = torch.mean(scale2, 2)
+        scale3 = F.relu(self.scale_bn3(self.scale_conv3(x11)))
+        scale3_pool = torch.std(scale3, 2)
+        # Concatenate all global features
+        global_features = torch.cat([
+            global_max, global_avg, global_att,
+            scale1_pool, scale2_pool, scale3_pool
+        ], dim=1)  # (batch_size, 4096)
+        # Enhanced classification head with residual connections
+        x = F.relu(self.fc_bn1(self.fc1(global_features)))
         x = self.dropout1(x)
+        x = F.relu(self.fc_bn2(self.fc2(x)))
+        identity1 = x
         x = self.dropout2(x)
+        x = F.relu(self.fc_bn3(self.fc3(x)))
         x = self.dropout3(x)
+        x = F.relu(self.fc_bn4(self.fc4(x)))
+        res_fc1 = self.fc_res1(identity1)
+        x = x + res_fc1  # Residual connection
+        identity2 = x
         x = self.dropout4(x)
+        x = F.relu(self.fc_bn5(self.fc5(x)))
         x = self.dropout5(x)
+        x = F.relu(self.fc_bn6(self.fc6(x)))
+        res_fc2 = self.fc_res2(identity2)
+        x = x + res_fc2  # Residual connection
+        identity3 = x
+        x = self.dropout6(x)
+        x = F.relu(self.fc_bn7(self.fc7(x)))
+        x = self.dropout7(x)
+        x = F.relu(self.fc_bn8(self.fc8(x)))
+        res_fc3 = self.fc_res3(identity3)
+        x = x + res_fc3  # Residual connection
+        x = self.dropout8(x)
+        x = F.relu(self.fc9(x))
+        classification = self.fc10(x)  # (batch_size, 1)
         return classification
         outputs = model(patch_tensor)  # (1, 1)
         probability = torch.sigmoid(outputs).item()
         predicted_class = int(probability > 0.5)
+        return predicted_class, probability

train_pnet_class_cluster.py CHANGED Viewed

@@ -5,9 +5,9 @@ if __name__ == "__main__":
     # Load the dataset
     dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_v2/initial.pth"
     os.makedirs(model_save_path, exist_ok=True)
     # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=512, lr=0.001)

     # Load the dataset
     dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges/"
+    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_stronger/initial.pth"
     os.makedirs(model_save_path, exist_ok=True)
     # Train the model
+    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001)