| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| """ " |
| This file is the entry point for launching experiments with Implicitron. |
| |
| Launch Training |
| --------------- |
| Experiment config .yaml files are located in the |
| `projects/implicitron_trainer/configs` folder. To launch an experiment, |
| specify the name of the file. Specific config values can also be overridden |
| from the command line, for example: |
| |
| ``` |
| ./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84 |
| ``` |
| |
| Main functions |
| --------------- |
| - The Experiment class defines `run` which creates the model, optimizer, and other |
| objects used in training, then starts TrainingLoop's `run` function. |
| - TrainingLoop takes care of the actual training logic: forward and backward passes, |
| evaluation and testing, as well as model checkpointing, visualization, and metric |
| printing. |
| |
| Outputs |
| -------- |
| The outputs of the experiment are saved and logged in multiple ways: |
| - Checkpoints: |
| Model, optimizer and stats are stored in the directory |
| named by the `exp_dir` key from the config file / CLI parameters. |
| - Stats |
| Stats are logged and plotted to the file "train_stats.pdf" in the |
| same directory. The stats are also saved as part of the checkpoint file. |
| - Visualizations |
| Predictions are plotted to a visdom server running at the |
| port specified by the `visdom_server` and `visdom_port` keys in the |
| config file. |
| |
| """ |
|
|
| import logging |
| import os |
| import warnings |
|
|
| from dataclasses import field |
|
|
| import hydra |
|
|
| import torch |
| from accelerate import Accelerator |
| from omegaconf import DictConfig, OmegaConf |
| from packaging import version |
|
|
| from pytorch3d.implicitron.dataset.data_source import ( |
| DataSourceBase, |
| ImplicitronDataSource, |
| ) |
| from pytorch3d.implicitron.models.base_model import ImplicitronModelBase |
|
|
| from pytorch3d.implicitron.models.renderer.multipass_ea import ( |
| MultiPassEmissionAbsorptionRenderer, |
| ) |
| from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler |
| from pytorch3d.implicitron.tools.config import ( |
| Configurable, |
| expand_args_fields, |
| remove_unused_components, |
| run_auto_creation, |
| ) |
|
|
| from .impl.model_factory import ModelFactoryBase |
| from .impl.optimizer_factory import OptimizerFactoryBase |
| from .impl.training_loop import TrainingLoopBase |
| from .impl.utils import seed_all_random_engines |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| _RUN = hydra.types.RunMode.RUN |
|
|
| if version.parse(hydra.__version__) < version.Version("1.1"): |
| raise ValueError( |
| f"Hydra version {hydra.__version__} is too old." |
| " (Implicitron requires version 1.1 or later.)" |
| ) |
|
|
| try: |
| |
| import pytorch3d.implicitron.fair_cluster.slurm |
| except ModuleNotFoundError: |
| pass |
|
|
| no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None |
|
|
|
|
| class Experiment(Configurable): |
| """ |
| This class is at the top level of Implicitron's config hierarchy. Its |
| members are high-level components necessary for training an implicit rende- |
| ring network. |
| |
| Members: |
| data_source: An object that produces datasets and dataloaders. |
| model_factory: An object that produces an implicit rendering model as |
| well as its corresponding Stats object. |
| optimizer_factory: An object that produces the optimizer and lr |
| scheduler. |
| training_loop: An object that runs training given the outputs produced |
| by the data_source, model_factory and optimizer_factory. |
| seed: A random seed to ensure reproducibility. |
| detect_anomaly: Whether torch.autograd should detect anomalies. Useful |
| for debugging, but might slow down the training. |
| exp_dir: Root experimentation directory. Checkpoints and training stats |
| will be saved here. |
| """ |
|
|
| |
| data_source: DataSourceBase |
| data_source_class_type: str = "ImplicitronDataSource" |
| |
| model_factory: ModelFactoryBase |
| model_factory_class_type: str = "ImplicitronModelFactory" |
| |
| optimizer_factory: OptimizerFactoryBase |
| optimizer_factory_class_type: str = "ImplicitronOptimizerFactory" |
| |
| training_loop: TrainingLoopBase |
| training_loop_class_type: str = "ImplicitronTrainingLoop" |
|
|
| seed: int = 42 |
| detect_anomaly: bool = False |
| exp_dir: str = "./data/default_experiment/" |
|
|
| hydra: dict = field( |
| default_factory=lambda: { |
| "run": {"dir": "."}, |
| "output_subdir": None, |
| "mode": _RUN, |
| } |
| ) |
|
|
| def __post_init__(self): |
| seed_all_random_engines( |
| self.seed |
| ) |
|
|
| run_auto_creation(self) |
|
|
| def run(self) -> None: |
| |
| if no_accelerate: |
| accelerator = None |
| device = torch.device("cuda:0") |
| else: |
| accelerator = Accelerator(device_placement=False) |
| logger.info(accelerator.state) |
| device = accelerator.device |
|
|
| logger.info(f"Running experiment on device: {device}") |
| os.makedirs(self.exp_dir, exist_ok=True) |
|
|
| |
| if self.detect_anomaly: |
| logger.info("Anomaly detection!") |
| torch.autograd.set_detect_anomaly(self.detect_anomaly) |
|
|
| |
| datasets, dataloaders = self.data_source.get_datasets_and_dataloaders() |
|
|
| |
| model = self.model_factory( |
| accelerator=accelerator, |
| exp_dir=self.exp_dir, |
| ) |
|
|
| stats = self.training_loop.load_stats( |
| log_vars=model.log_vars, |
| exp_dir=self.exp_dir, |
| resume=self.model_factory.resume, |
| resume_epoch=self.model_factory.resume_epoch, |
| ) |
| start_epoch = stats.epoch + 1 |
|
|
| model.to(device) |
|
|
| |
| optimizer, scheduler = self.optimizer_factory( |
| accelerator=accelerator, |
| exp_dir=self.exp_dir, |
| last_epoch=start_epoch, |
| model=model, |
| resume=self.model_factory.resume, |
| resume_epoch=self.model_factory.resume_epoch, |
| ) |
|
|
| |
| |
| |
| train_loader = dataloaders.train |
| val_loader = dataloaders.val |
| test_loader = dataloaders.test |
| if accelerator is not None: |
| ( |
| model, |
| optimizer, |
| train_loader, |
| val_loader, |
| ) = accelerator.prepare(model, optimizer, train_loader, val_loader) |
|
|
| |
| self.training_loop.run( |
| train_loader=train_loader, |
| val_loader=val_loader, |
| test_loader=test_loader, |
| |
| train_dataset=datasets.train, |
| model=model, |
| optimizer=optimizer, |
| scheduler=scheduler, |
| accelerator=accelerator, |
| device=device, |
| exp_dir=self.exp_dir, |
| stats=stats, |
| seed=self.seed, |
| ) |
|
|
|
|
| def _setup_envvars_for_cluster() -> bool: |
| """ |
| Prepares to run on cluster if relevant. |
| Returns whether FAIR cluster in use. |
| """ |
| |
|
|
| try: |
| import submitit |
| except ImportError: |
| return False |
|
|
| try: |
| |
| job_env = submitit.JobEnvironment() |
| except RuntimeError: |
| return False |
|
|
| os.environ["LOCAL_RANK"] = str(job_env.local_rank) |
| os.environ["RANK"] = str(job_env.global_rank) |
| os.environ["WORLD_SIZE"] = str(job_env.num_tasks) |
| os.environ["MASTER_ADDR"] = "localhost" |
| os.environ["MASTER_PORT"] = "42918" |
| logger.info( |
| "Num tasks %s, global_rank %s" |
| % (str(job_env.num_tasks), str(job_env.global_rank)) |
| ) |
|
|
| return True |
|
|
|
|
| def dump_cfg(cfg: DictConfig) -> None: |
| remove_unused_components(cfg) |
| |
| os.makedirs(cfg.exp_dir, exist_ok=True) |
| try: |
| cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml") |
| OmegaConf.save(config=cfg, f=cfg_filename) |
| except PermissionError: |
| warnings.warn("Can't dump config due to insufficient permissions!") |
|
|
|
|
| expand_args_fields(Experiment) |
| cs = hydra.core.config_store.ConfigStore.instance() |
| cs.store(name="default_config", node=Experiment) |
|
|
|
|
| @hydra.main(config_path="./configs/", config_name="default_config") |
| def experiment(cfg: DictConfig) -> None: |
| |
|
|
| if "CUDA_DEVICE_ORDER" not in os.environ: |
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
|
|
| if not _setup_envvars_for_cluster(): |
| logger.info("Running locally") |
|
|
| |
| expand_args_fields(ImplicitronModelBase) |
| expand_args_fields(AdaptiveRaySampler) |
| expand_args_fields(MultiPassEmissionAbsorptionRenderer) |
| expand_args_fields(ImplicitronDataSource) |
|
|
| experiment = Experiment(**cfg) |
| dump_cfg(cfg) |
| experiment.run() |
|
|
|
|
| if __name__ == "__main__": |
| experiment() |
|
|