Upload folder using huggingface_hub

c6535db verified 20 days ago

9.44 kB

	# @package _global_
	defaults:
	- _self_

	# This config is the base configuration for all evaluations. Amongst other things, it defines:
	# - the model
	# - the image transforms
	# - the post processors
	# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
	#
	# Most of the parameters should be kept as-is. The main modifications you may want to make are:
	# - the cluster configuration, to adjust partitions/qos to your system
	# - the flag gather_pred_via_filesys if you ram is tight
	# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
	# - the paths below


	# ============================================================================
	# Paths Configuration (Chage this to your own paths)
	# ============================================================================
	paths:
	# If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
	checkpoint_path: null
	# the experiments will be subfolders of this
	base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>

	# base path to the annotation folder for gold (refer to the readmes on how to download)
	base_annotation_path: <YOUR_GOLD_GT_DIR>

	# base path to the annotation folder for silver (refer to the readmes on how to download)
	base_annotation_path_silver: <YOUR_SILVER_GT_DIR>

	# path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
	metaclip_img_path: <YOUR_METACLIP_IMG_DIR>

	# path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
	sa1b_img_path: <YOUR_SA1B_IMG_DIR>

	# path to the SA-Co/silver images
	silver_img_path: <YOUR_SILVER_IMG_DIR>

	bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz


	# ============================================================================
	# Different helper parameters and functions
	# ============================================================================
	scratch:

	use_presence_eval: True

	base_val_transform:
	- _target_: sam3.train.transforms.basic_for_api.ComposeAPI
	transforms:
	######## transforms for validation (begin) ########
	- _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
	sizes: ${scratch.resolution} # originally `resolution: 1024`
	max_size:
	_target_: sam3.train.transforms.basic.get_random_resize_max_size
	size: ${scratch.resolution} # originally `resolution: 1024`
	square: true
	consistent_transform: False
	######## transforms for validation (end) ########
	- _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
	- _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
	mean: ${scratch.val_norm_mean}
	std: ${scratch.val_norm_std}

	loss: null

	# Model parameters
	d_model: 256
	input_box_embedding_dim: ${add:${scratch.d_model},2}

	# Box processing
	original_box_postprocessor:
	_target_: sam3.eval.postprocessors.PostProcessImage
	max_dets_per_img: -1 # infinite detections
	use_original_ids: true
	use_original_sizes_box: true
	use_presence: ${scratch.use_presence_eval}

	box_postprocessor:
	_target_: sam3.eval.postprocessors.PostProcessImage
	max_dets_per_img: -1 #infinite detections
	use_original_ids: false
	use_original_sizes_box: false
	use_presence: ${scratch.use_presence_eval}

	box_postprocessor_thresholded:
	_target_: sam3.eval.postprocessors.PostProcessImage
	max_dets_per_img: -1 #infinite detections
	use_original_ids: false
	use_original_sizes_box: false
	detection_threshold: 0.3
	use_presence: ${scratch.use_presence_eval}

	mask_postprocessor_thresholded:
	_target_: sam3.eval.postprocessors.PostProcessImage
	max_dets_per_img: -1 #infinite detections
	iou_type: "segm"
	use_original_ids: false
	use_original_sizes_box: false
	use_original_sizes_mask: true
	convert_mask_to_rle: True
	detection_threshold: 0.3
	use_presence: ${scratch.use_presence_eval}

	# Image processing parameters
	resolution: 1008
	max_ann_per_img: 200

	# Normalization parameters
	train_norm_mean: [0.5, 0.5, 0.5]
	train_norm_std: [0.5, 0.5, 0.5]
	val_norm_mean: [0.5, 0.5, 0.5]
	val_norm_std: [0.5, 0.5, 0.5]

	# Training parameters
	train_batch_size: 1
	val_batch_size: 1
	num_train_workers: 0
	num_val_workers: 10 # change this depending on the number of cpu cores available
	max_data_epochs: 20
	target_epoch_size: 1500
	hybrid_repeats: 1
	context_length: 2

	# All reduce - this controls how the predictions are sent back to node 0.
	# If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
	# Switch to true if you get cpu ooms during gather.
	gather_pred_via_filesys: false

	# Learning rate and scheduler parameters (unused for eval)
	lr_scale: 0.1
	lr_transformer: ${times:8e-4,${scratch.lr_scale}}
	lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
	lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
	lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
	wd: 0.1
	scheduler_timescale: 20
	scheduler_warmup: 20
	scheduler_cooldown: 20


	# ============================================================================
	# Trainer Configuration
	# ============================================================================

	trainer:
	_target_: sam3.train.trainer.Trainer
	skip_saving_ckpts: true
	empty_gpu_mem_cache_after_eval: True
	skip_first_val: True
	max_epochs: ${scratch.max_data_epochs}
	accelerator: cuda
	seed_value: 123
	val_epoch_freq: 10
	mode: val

	distributed:
	backend: nccl
	find_unused_parameters: True
	gradient_as_bucket_view: True

	loss:
	all:
	_target_: sam3.train.loss.sam3_loss.DummyLoss
	default:
	_target_: sam3.train.loss.sam3_loss.DummyLoss

	data:
	train: null
	val: null

	model:
	_target_: sam3.model_builder.build_sam3_image_model
	bpe_path: ${paths.bpe_path}
	device: cpus
	eval_mode: true
	enable_segmentation: true # Warning: Enable this if using segmentation.
	checkpoint_path: ${paths.checkpoint_path}

	meters:
	val: null

	optim:
	amp:
	enabled: True
	amp_dtype: bfloat16

	optimizer:
	_target_: torch.optim.AdamW

	gradient_clip:
	_target_: sam3.train.optim.optimizer.GradientClipper
	max_norm: 0.1
	norm_type: 2

	param_group_modifiers:
	- _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
	_partial_: True
	layer_decay_value: ${scratch.lrd_vision_backbone}
	apply_to: 'backbone.vision_backbone.trunk'
	overrides:
	- pattern: 'pos_embed'
	value: 1.0

	options:
	lr:
	- scheduler: # transformer and class_embed
	_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
	base_lr: ${scratch.lr_transformer}
	timescale: ${scratch.scheduler_timescale}
	warmup_steps: ${scratch.scheduler_warmup}
	cooldown_steps: ${scratch.scheduler_cooldown}
	- scheduler:
	_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
	base_lr: ${scratch.lr_vision_backbone}
	timescale: ${scratch.scheduler_timescale}
	warmup_steps: ${scratch.scheduler_warmup}
	cooldown_steps: ${scratch.scheduler_cooldown}
	param_names:
	- 'backbone.vision_backbone.*'
	- scheduler:
	_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
	base_lr: ${scratch.lr_language_backbone}
	timescale: ${scratch.scheduler_timescale}
	warmup_steps: ${scratch.scheduler_warmup}
	cooldown_steps: ${scratch.scheduler_cooldown}
	param_names:
	- 'backbone.language_backbone.*'

	weight_decay:
	- scheduler:
	_target_: fvcore.common.param_scheduler.ConstantParamScheduler
	value: ${scratch.wd}
	- scheduler:
	_target_: fvcore.common.param_scheduler.ConstantParamScheduler
	value: 0.0
	param_names:
	- 'bias'
	module_cls_names: ['torch.nn.LayerNorm']

	checkpoint:
	save_dir: ${launcher.experiment_log_dir}/checkpoints
	save_freq: 0 # 0 only last checkpoint is saved.


	logging:
	tensorboard_writer:
	_target_: sam3.train.utils.logger.make_tensorboard_logger
	log_dir: ${launcher.experiment_log_dir}/tensorboard
	flush_secs: 120
	should_log: True
	wandb_writer: null
	log_dir: ${launcher.experiment_log_dir}/logs/
	log_freq: 10

	# ============================================================================
	# Launcher and Submitit Configuration
	# ============================================================================

	launcher:
	num_nodes: 4
	gpus_per_node: 8
	experiment_log_dir: ${paths.experiment_log_dir}
	multiprocessing_context: forkserver


	submitit:
	account: null # Add your SLURM account if use_cluster == 1
	partition: null
	qos: null # Add your QoS if use_cluster == 1
	timeout_hour: 72
	use_cluster: True
	cpus_per_task: 10
	port_range: [10000, 65000]
	constraint: null