Improves training efficiency and configuration
Browse filesIncreases the number of workers for the data loader to improve data loading speed.
Adjusts SLURM job configuration to allocate more CPUs per task, potentially speeding up computations.
Modifies the training script to save the initial model weights, and sets default training parameters like epochs, batch size, learning rate, and score weight for better reproducibility and control over the training process.
- fast_pointnet.py +1 -1
- hoho_gpu.batch +1 -1
- train_pnet_cluster.py +2 -2
fast_pointnet.py
CHANGED
|
@@ -274,7 +274,7 @@ def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, ba
|
|
| 274 |
|
| 275 |
return patch_data, targets, valid_masks, distances
|
| 276 |
|
| 277 |
-
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=
|
| 278 |
collate_fn=collate_fn, drop_last=True)
|
| 279 |
|
| 280 |
# Initialize model with score prediction
|
|
|
|
| 274 |
|
| 275 |
return patch_data, targets, valid_masks, distances
|
| 276 |
|
| 277 |
+
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
|
| 278 |
collate_fn=collate_fn, drop_last=True)
|
| 279 |
|
| 280 |
# Initialize model with score prediction
|
hoho_gpu.batch
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
#SBATCH --nodes=1 # 1 node
|
| 3 |
#SBATCH --ntasks-per-node=1 # 1 tasks per node
|
| 4 |
-
#SBATCH --cpus-per-task=
|
| 5 |
#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
|
| 6 |
#SBATCH --time=24:00:00 # time limits: 1 hour
|
| 7 |
#SBATCH --error=hoho_gpu.err # standard error file
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
#SBATCH --nodes=1 # 1 node
|
| 3 |
#SBATCH --ntasks-per-node=1 # 1 tasks per node
|
| 4 |
+
#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
|
| 5 |
#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
|
| 6 |
#SBATCH --time=24:00:00 # time limits: 1 hour
|
| 7 |
#SBATCH --error=hoho_gpu.err # standard error file
|
train_pnet_cluster.py
CHANGED
|
@@ -4,7 +4,7 @@ if __name__ == "__main__":
|
|
| 4 |
|
| 5 |
# Load the dataset
|
| 6 |
dataset_path = "/mnt/personal/skvrnjan/hohocustom/"
|
| 7 |
-
model_save_path = "/mnt/personal/skvrnjan/hoho_pnet/"
|
| 8 |
|
| 9 |
# Train the model
|
| 10 |
-
train_pointnet(dataset_path, model_save_path)
|
|
|
|
| 4 |
|
| 5 |
# Load the dataset
|
| 6 |
dataset_path = "/mnt/personal/skvrnjan/hohocustom/"
|
| 7 |
+
model_save_path = "/mnt/personal/skvrnjan/hoho_pnet/initial.pth"
|
| 8 |
|
| 9 |
# Train the model
|
| 10 |
+
train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, learning_rate=0.001, score_weight=0.1)
|