import argparse parser = argparse.ArgumentParser() parser.add_argument( "--freeze-clustering", action="store_true", default=False, help="Freeze the clustering part of the model", ) parser.add_argument("-c", "--data-config", type=str, help="data config YAML file") parser.add_argument( "-i", "--data-train", nargs="*", default=[], help="training files; supported syntax:" " (a) plain list, `--data-train /path/to/a/* /path/to/b/*`;" " (b) (named) groups [Recommended], `--data-train a:/path/to/a/* b:/path/to/b/*`," " the file splitting (for each dataloader worker) will be performed per group," " and then mixed together, to ensure a uniform mixing from all groups for each worker.", ) parser.add_argument( "-l", "--data-val", nargs="*", default=[], help="validation files; when not set, will use training files and split by `--train-val-split`", ) parser.add_argument( "-t", "--data-test", nargs="*", default=[], help="testing files; supported syntax:" " (a) plain list, `--data-test /path/to/a/* /path/to/b/*`;" " (b) keyword-based, `--data-test a:/path/to/a/* b:/path/to/b/*`, will produce output_a, output_b;" " (c) split output per N input files, `--data-test a%10:/path/to/a/*`, will split per 10 input files", ) parser.add_argument( "--data-fraction", type=float, default=1, help="fraction of events to load from each file; for training, the events are randomly selected for each epoch", ) parser.add_argument( "--file-fraction", type=float, default=1, help="fraction of files to load; for training, the files are randomly selected for each epoch", ) parser.add_argument( "--fetch-by-files", action="store_true", default=False, help="When enabled, will load all events from a small number (set by ``--fetch-step``) of files for each data fetching. " "Otherwise (default), load a small fraction of events from all files each time, which helps reduce variations in the sample composition.", ) parser.add_argument( "--fetch-step", type=float, default=0.01, help="fraction of events to load each time from every file (when ``--fetch-by-files`` is disabled); " "Or: number of files to load each time (when ``--fetch-by-files`` is enabled). Shuffling & sampling is done within these events, so set a large enough value.", ) parser.add_argument( "--train-val-split", type=float, default=0.8, help="training/validation split fraction", ) parser.add_argument( "-n", "--network-config", type=str, help="network architecture configuration file; the path must be relative to the current dir", ) parser.add_argument( "-m", "--model-prefix", type=str, default="models/{auto}/networkss", help="path to save or load the model; for training, this will be used as a prefix, so model snapshots " "will saved to `{model_prefix}_epoch-%d_state.pt` after each epoch, and the one with the best " "validation metric to `{model_prefix}_best_epoch_state.pt`; for testing, this should be the full path " "including the suffix, otherwise the one with the best validation metric will be used; " "for training, `{auto}` can be used as part of the path to auto-generate a name, " "based on the timestamp and network configuration", ) parser.add_argument( "--load-model-weights", type=str, default=None, help="initialize model with pre-trained weights", ) parser.add_argument( "--load-model-weights-clustering", type=str, default=None, help="initialize model with pre-trained weights for clustering part of the model", ) parser.add_argument("--start-lr", type=float, default=5e-3, help="start learning rate") parser.add_argument("--num-epochs", type=int, default=20, help="number of epochs") parser.add_argument( "--steps-per-epoch", type=int, default=None, help="number of steps (iterations) per epochs; " "if neither of `--steps-per-epoch` or `--samples-per-epoch` is set, each epoch will run over all loaded samples", ) parser.add_argument( "--steps-per-epoch-val", type=int, default=None, help="number of steps (iterations) per epochs for validation; " "if neither of `--steps-per-epoch-val` or `--samples-per-epoch-val` is set, each epoch will run over all loaded samples", ) parser.add_argument( "--samples-per-epoch", type=int, default=None, help="number of samples per epochs; " "if neither of `--steps-per-epoch` or `--samples-per-epoch` is set, each epoch will run over all loaded samples", ) parser.add_argument( "--samples-per-epoch-val", type=int, default=None, help="number of samples per epochs for validation; " "if neither of `--steps-per-epoch-val` or `--samples-per-epoch-val` is set, each epoch will run over all loaded samples", ) parser.add_argument("--batch-size", type=int, default=128, help="batch size") parser.add_argument( "--gpus", type=str, default="0", help='device for the training/testing; to use CPU, set to empty string (""); to use multiple gpu, set it as a comma separated list, e.g., `1,2,3,4`', ) parser.add_argument( "--num-workers", type=int, default=1, help="number of threads to load the dataset; memory consumption and disk access load increases (~linearly) with this numbers", ) parser.add_argument( "--prefetch-factor", type=int, default=1, help="How many items to prefetch in the dataloaders. Should be about the same order of magnitude as batch size for optimal performance.", ) parser.add_argument( "--predict", action="store_true", default=False, help="run prediction instead of training", ) parser.add_argument( "--log-wandb", action="store_true", default=False, help="use wandb for loging" ) parser.add_argument( "--wandb-displayname", type=str, help="give display name to wandb run, if not entered a random one is generated", ) parser.add_argument( "--wandb-projectname", type=str, help="project where the run is stored inside wandb" ) parser.add_argument( "--wandb-entity", type=str, help="username or team name where you are sending runs" ) parser.add_argument( "--qmin", type=float, default=0.1, help="define qmin for condensation" ) parser.add_argument( "--frac_cluster_loss", type=float, default=0, help="Fraction of total pairs to use for the clustering loss", ) parser.add_argument( "--use-average-cc-pos", default=0.0, type=float, help="push the alpha to the mean of the coordinates in the object by this value", ) parser.add_argument( "--correction", action="store_true", default=False, help="Train correction only", ) parser.add_argument( "--use-gt-clusters", default=False, action="store_true", help="If toggled, uses ground-truth clusters instead of the predicted ones by the model. We can use this to simulate 'ideal' clustering.", ) parser.add_argument( "--name-output", type=str, help="name of the dataframe stored during eval", ) parser.add_argument( "--train-batches", default=100, type=int, help="number of train batches", ) parser.add_argument( "--pandora", default=False, action="store_true", help="using pandora information", )