Spaces:

gregorkrzmanc
/

HitPF_demo

Sleeping

github-actions[bot]

Sync from GitHub f6dbbfb

cc0720f 29 days ago

7.37 kB

	import argparse

	parser = argparse.ArgumentParser()

	parser.add_argument(
	"--freeze-clustering",
	action="store_true",
	default=False,
	help="Freeze the clustering part of the model",
	)


	parser.add_argument("-c", "--data-config", type=str, help="data config YAML file")

	parser.add_argument(
	"-i",
	"--data-train",
	nargs="*",
	default=[],
	help="training files; supported syntax:"
	" (a) plain list, `--data-train /path/to/a/* /path/to/b/*`;"
	" (b) (named) groups [Recommended], `--data-train a:/path/to/a/* b:/path/to/b/*`,"
	" the file splitting (for each dataloader worker) will be performed per group,"
	" and then mixed together, to ensure a uniform mixing from all groups for each worker.",
	)
	parser.add_argument(
	"-l",
	"--data-val",
	nargs="*",
	default=[],
	help="validation files; when not set, will use training files and split by `--train-val-split`",
	)
	parser.add_argument(
	"-t",
	"--data-test",
	nargs="*",
	default=[],
	help="testing files; supported syntax:"
	" (a) plain list, `--data-test /path/to/a/* /path/to/b/*`;"
	" (b) keyword-based, `--data-test a:/path/to/a/* b:/path/to/b/*`, will produce output_a, output_b;"
	" (c) split output per N input files, `--data-test a%10:/path/to/a/*`, will split per 10 input files",
	)

	parser.add_argument(
	"--data-fraction",
	type=float,
	default=1,
	help="fraction of events to load from each file; for training, the events are randomly selected for each epoch",
	)
	parser.add_argument(
	"--file-fraction",
	type=float,
	default=1,
	help="fraction of files to load; for training, the files are randomly selected for each epoch",
	)
	parser.add_argument(
	"--fetch-by-files",
	action="store_true",
	default=False,
	help="When enabled, will load all events from a small number (set by ``--fetch-step``) of files for each data fetching. "
	"Otherwise (default), load a small fraction of events from all files each time, which helps reduce variations in the sample composition.",
	)
	parser.add_argument(
	"--fetch-step",
	type=float,
	default=0.01,
	help="fraction of events to load each time from every file (when ``--fetch-by-files`` is disabled); "
	"Or: number of files to load each time (when ``--fetch-by-files`` is enabled). Shuffling & sampling is done within these events, so set a large enough value.",
	)

	parser.add_argument(
	"--train-val-split",
	type=float,
	default=0.8,
	help="training/validation split fraction",
	)


	parser.add_argument(
	"-n",
	"--network-config",
	type=str,
	help="network architecture configuration file; the path must be relative to the current dir",
	)
	parser.add_argument(
	"-m",
	"--model-prefix",
	type=str,
	default="models/{auto}/networkss",
	help="path to save or load the model; for training, this will be used as a prefix, so model snapshots "
	"will saved to `{model_prefix}_epoch-%d_state.pt` after each epoch, and the one with the best "
	"validation metric to `{model_prefix}_best_epoch_state.pt`; for testing, this should be the full path "
	"including the suffix, otherwise the one with the best validation metric will be used; "
	"for training, `{auto}` can be used as part of the path to auto-generate a name, "
	"based on the timestamp and network configuration",
	)

	parser.add_argument(
	"--load-model-weights",
	type=str,
	default=None,
	help="initialize model with pre-trained weights",
	)
	parser.add_argument(
	"--load-model-weights-clustering",
	type=str,
	default=None,
	help="initialize model with pre-trained weights for clustering part of the model",
	)
	parser.add_argument("--start-lr", type=float, default=5e-3, help="start learning rate")

	parser.add_argument("--num-epochs", type=int, default=20, help="number of epochs")
	parser.add_argument(
	"--steps-per-epoch",
	type=int,
	default=None,
	help="number of steps (iterations) per epochs; "
	"if neither of `--steps-per-epoch` or `--samples-per-epoch` is set, each epoch will run over all loaded samples",
	)
	parser.add_argument(
	"--steps-per-epoch-val",
	type=int,
	default=None,
	help="number of steps (iterations) per epochs for validation; "
	"if neither of `--steps-per-epoch-val` or `--samples-per-epoch-val` is set, each epoch will run over all loaded samples",
	)
	parser.add_argument(
	"--samples-per-epoch",
	type=int,
	default=None,
	help="number of samples per epochs; "
	"if neither of `--steps-per-epoch` or `--samples-per-epoch` is set, each epoch will run over all loaded samples",
	)
	parser.add_argument(
	"--samples-per-epoch-val",
	type=int,
	default=None,
	help="number of samples per epochs for validation; "
	"if neither of `--steps-per-epoch-val` or `--samples-per-epoch-val` is set, each epoch will run over all loaded samples",
	)
	parser.add_argument("--batch-size", type=int, default=128, help="batch size")

	parser.add_argument(
	"--gpus",
	type=str,
	default="0",
	help='device for the training/testing; to use CPU, set to empty string (""); to use multiple gpu, set it as a comma separated list, e.g., `1,2,3,4`',
	)

	parser.add_argument(
	"--num-workers",
	type=int,
	default=1,
	help="number of threads to load the dataset; memory consumption and disk access load increases (~linearly) with this numbers",
	)
	parser.add_argument(
	"--prefetch-factor",
	type=int,
	default=1,
	help="How many items to prefetch in the dataloaders. Should be about the same order of magnitude as batch size for optimal performance.",
	)
	parser.add_argument(
	"--predict",
	action="store_true",
	default=False,
	help="run prediction instead of training",
	)




	parser.add_argument(
	"--log-wandb", action="store_true", default=False, help="use wandb for loging"
	)
	parser.add_argument(
	"--wandb-displayname",
	type=str,
	help="give display name to wandb run, if not entered a random one is generated",
	)
	parser.add_argument(
	"--wandb-projectname", type=str, help="project where the run is stored inside wandb"
	)
	parser.add_argument(
	"--wandb-entity", type=str, help="username or team name where you are sending runs"
	)


	parser.add_argument(
	"--qmin", type=float, default=0.1, help="define qmin for condensation"
	)


	parser.add_argument(
	"--frac_cluster_loss",
	type=float,
	default=0,
	help="Fraction of total pairs to use for the clustering loss",
	)





	parser.add_argument(
	"--use-average-cc-pos",
	default=0.0,
	type=float,
	help="push the alpha to the mean of the coordinates in the object by this value",
	)


	parser.add_argument(
	"--correction",
	action="store_true",
	default=False,
	help="Train correction only",
	)




	parser.add_argument(
	"--use-gt-clusters",
	default=False,
	action="store_true",
	help="If toggled, uses ground-truth clusters instead of the predicted ones by the model. We can use this to simulate 'ideal' clustering.",
	)


	parser.add_argument(
	"--name-output",
	type=str,
	help="name of the dataframe stored during eval",
	)
	parser.add_argument(
	"--train-batches",
	default=100,
	type=int,
	help="number of train batches",
	)
	parser.add_argument(
	"--pandora",
	default=False,
	action="store_true",
	help="using pandora information",
	)