DepthAnything-Video

Running

App Files Files Community

DepthAnything-Video / torchhub /facebookresearch_dinov2_main /dinov2 /eval /log_regression.py

JohanDL

adding checkpoints

bbd0fe5 almost 2 years ago

raw

history blame contribute delete

15.1 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import argparse
	import gc
	import logging
	import sys
	import time
	from typing import List, Optional

	from cuml.linear_model import LogisticRegression
	import torch
	import torch.backends.cudnn as cudnn
	import torch.distributed
	from torch import nn
	from torch.utils.data import TensorDataset
	from torchmetrics import MetricTracker

	from dinov2.data import make_dataset
	from dinov2.data.transforms import make_classification_eval_transform
	from dinov2.distributed import get_global_rank, get_global_size
	from dinov2.eval.metrics import MetricType, build_metric
	from dinov2.eval.setup import get_args_parser as get_setup_args_parser
	from dinov2.eval.setup import setup_and_build_model
	from dinov2.eval.utils import evaluate, extract_features
	from dinov2.utils.dtype import as_torch_dtype


	logger = logging.getLogger("dinov2")

	DEFAULT_MAX_ITER = 1_000
	C_POWER_RANGE = torch.linspace(-6, 5, 45)
	_CPU_DEVICE = torch.device("cpu")


	def get_args_parser(
	description: Optional[str] = None,
	parents: Optional[List[argparse.ArgumentParser]] = None,
	add_help: bool = True,
	):
	parents = parents or []
	setup_args_parser = get_setup_args_parser(parents=parents, add_help=False)
	parents = [setup_args_parser]
	parser = argparse.ArgumentParser(
	description=description,
	parents=parents,
	add_help=add_help,
	)
	parser.add_argument(
	"--train-dataset",
	dest="train_dataset_str",
	type=str,
	help="Training dataset",
	)
	parser.add_argument(
	"--val-dataset",
	dest="val_dataset_str",
	type=str,
	help="Validation dataset",
	)
	parser.add_argument(
	"--finetune-dataset-str",
	dest="finetune_dataset_str",
	type=str,
	help="Fine-tuning dataset",
	)
	parser.add_argument(
	"--finetune-on-val",
	action="store_true",
	help="If there is no finetune dataset, whether to choose the "
	"hyperparameters on the val set instead of 10%% of the train dataset",
	)
	parser.add_argument(
	"--metric-type",
	type=MetricType,
	choices=list(MetricType),
	help="Metric type",
	)
	parser.add_argument(
	"--train-features-device",
	type=str,
	help="Device to gather train features (cpu, cuda, cuda:0, etc.), default: %(default)s",
	)
	parser.add_argument(
	"--train-dtype",
	type=str,
	help="Data type to convert the train features to (default: %(default)s)",
	)
	parser.add_argument(
	"--max-train-iters",
	type=int,
	help="Maximum number of train iterations (default: %(default)s)",
	)
	parser.set_defaults(
	train_dataset_str="ImageNet:split=TRAIN",
	val_dataset_str="ImageNet:split=VAL",
	finetune_dataset_str=None,
	metric_type=MetricType.MEAN_ACCURACY,
	train_features_device="cpu",
	train_dtype="float64",
	max_train_iters=DEFAULT_MAX_ITER,
	finetune_on_val=False,
	)
	return parser


	class LogRegModule(nn.Module):
	def __init__(
	self,
	C,
	max_iter=DEFAULT_MAX_ITER,
	dtype=torch.float64,
	device=_CPU_DEVICE,
	):
	super().__init__()
	self.dtype = dtype
	self.device = device
	self.estimator = LogisticRegression(
	penalty="l2",
	C=C,
	max_iter=max_iter,
	output_type="numpy",
	tol=1e-12,
	linesearch_max_iter=50,
	)

	def forward(self, samples, targets):
	samples_device = samples.device
	samples = samples.to(dtype=self.dtype, device=self.device)
	if self.device == _CPU_DEVICE:
	samples = samples.numpy()
	probas = self.estimator.predict_proba(samples)
	return {"preds": torch.from_numpy(probas).to(samples_device), "target": targets}

	def fit(self, train_features, train_labels):
	train_features = train_features.to(dtype=self.dtype, device=self.device)
	train_labels = train_labels.to(dtype=self.dtype, device=self.device)
	if self.device == _CPU_DEVICE:
	# both cuML and sklearn only work with numpy arrays on CPU
	train_features = train_features.numpy()
	train_labels = train_labels.numpy()
	self.estimator.fit(train_features, train_labels)


	def evaluate_model(*, logreg_model, logreg_metric, test_data_loader, device):
	postprocessors = {"metrics": logreg_model}
	metrics = {"metrics": logreg_metric}
	return evaluate(nn.Identity(), test_data_loader, postprocessors, metrics, device)


	def train_for_C(*, C, max_iter, train_features, train_labels, dtype=torch.float64, device=_CPU_DEVICE):
	logreg_model = LogRegModule(C, max_iter=max_iter, dtype=dtype, device=device)
	logreg_model.fit(train_features, train_labels)
	return logreg_model


	def train_and_evaluate(
	*,
	C,
	max_iter,
	train_features,
	train_labels,
	logreg_metric,
	test_data_loader,
	train_dtype=torch.float64,
	train_features_device,
	eval_device,
	):
	logreg_model = train_for_C(
	C=C,
	max_iter=max_iter,
	train_features=train_features,
	train_labels=train_labels,
	dtype=train_dtype,
	device=train_features_device,
	)
	return evaluate_model(
	logreg_model=logreg_model,
	logreg_metric=logreg_metric,
	test_data_loader=test_data_loader,
	device=eval_device,
	)


	def sweep_C_values(
	*,
	train_features,
	train_labels,
	test_data_loader,
	metric_type,
	num_classes,
	train_dtype=torch.float64,
	train_features_device=_CPU_DEVICE,
	max_train_iters=DEFAULT_MAX_ITER,
	):
	if metric_type == MetricType.PER_CLASS_ACCURACY:
	# If we want to output per-class accuracy, we select the hyperparameters with mean per class
	metric_type = MetricType.MEAN_PER_CLASS_ACCURACY
	logreg_metric = build_metric(metric_type, num_classes=num_classes)
	metric_tracker = MetricTracker(logreg_metric, maximize=True)
	ALL_C = 10**C_POWER_RANGE
	logreg_models = {}

	train_features = train_features.to(dtype=train_dtype, device=train_features_device)
	train_labels = train_labels.to(device=train_features_device)

	for i in range(get_global_rank(), len(ALL_C), get_global_size()):
	C = ALL_C[i].item()
	logger.info(
	f"Training for C = {C:.5f}, dtype={train_dtype}, "
	f"features: {train_features.shape}, {train_features.dtype}, "
	f"labels: {train_labels.shape}, {train_labels.dtype}"
	)
	logreg_models[C] = train_for_C(
	C=C,
	max_iter=max_train_iters,
	train_features=train_features,
	train_labels=train_labels,
	dtype=train_dtype,
	device=train_features_device,
	)

	gather_list = [None for _ in range(get_global_size())]
	torch.distributed.all_gather_object(gather_list, logreg_models)

	logreg_models_gathered = {}
	for logreg_dict in gather_list:
	logreg_models_gathered.update(logreg_dict)

	for i in range(len(ALL_C)):
	metric_tracker.increment()
	C = ALL_C[i].item()
	evals = evaluate_model(
	logreg_model=logreg_models_gathered[C],
	logreg_metric=metric_tracker,
	test_data_loader=test_data_loader,
	device=torch.cuda.current_device(),
	)
	logger.info(f"Trained for C = {C:.5f}, accuracies = {evals}")

	best_stats, which_epoch = metric_tracker.best_metric(return_step=True)
	best_stats_100 = {k: 100.0 * v for k, v in best_stats.items()}
	if which_epoch["top-1"] == i:
	best_C = C
	logger.info(f"Sweep best {best_stats_100}, best C = {best_C:.6f}")

	return best_stats, best_C


	def eval_log_regression(
	*,
	model,
	train_dataset,
	val_dataset,
	finetune_dataset,
	metric_type,
	batch_size,
	num_workers,
	finetune_on_val=False,
	train_dtype=torch.float64,
	train_features_device=_CPU_DEVICE,
	max_train_iters=DEFAULT_MAX_ITER,
	):
	"""
	Implements the "standard" process for log regression evaluation:
	The value of C is chosen by training on train_dataset and evaluating on
	finetune_dataset. Then, the final model is trained on a concatenation of
	train_dataset and finetune_dataset, and is evaluated on val_dataset.
	If there is no finetune_dataset, the value of C is the one that yields
	the best results on a random 10% subset of the train dataset
	"""

	start = time.time()

	train_features, train_labels = extract_features(
	model, train_dataset, batch_size, num_workers, gather_on_cpu=(train_features_device == _CPU_DEVICE)
	)
	val_features, val_labels = extract_features(
	model, val_dataset, batch_size, num_workers, gather_on_cpu=(train_features_device == _CPU_DEVICE)
	)
	val_data_loader = torch.utils.data.DataLoader(
	TensorDataset(val_features, val_labels),
	batch_size=batch_size,
	drop_last=False,
	num_workers=0,
	persistent_workers=False,
	)

	if finetune_dataset is None and finetune_on_val:
	logger.info("Choosing hyperparameters on the val dataset")
	finetune_features, finetune_labels = val_features, val_labels
	elif finetune_dataset is None and not finetune_on_val:
	logger.info("Choosing hyperparameters on 10% of the train dataset")
	torch.manual_seed(0)
	indices = torch.randperm(len(train_features), device=train_features.device)
	finetune_index = indices[: len(train_features) // 10]
	train_index = indices[len(train_features) // 10 :]
	finetune_features, finetune_labels = train_features[finetune_index], train_labels[finetune_index]
	train_features, train_labels = train_features[train_index], train_labels[train_index]
	else:
	logger.info("Choosing hyperparameters on the finetune dataset")
	finetune_features, finetune_labels = extract_features(
	model, finetune_dataset, batch_size, num_workers, gather_on_cpu=(train_features_device == _CPU_DEVICE)
	)
	# release the model - free GPU memory
	del model
	gc.collect()
	torch.cuda.empty_cache()
	finetune_data_loader = torch.utils.data.DataLoader(
	TensorDataset(finetune_features, finetune_labels),
	batch_size=batch_size,
	drop_last=False,
	)

	if len(train_labels.shape) > 1:
	num_classes = train_labels.shape[1]
	else:
	num_classes = train_labels.max() + 1

	logger.info("Using cuML for logistic regression")

	best_stats, best_C = sweep_C_values(
	train_features=train_features,
	train_labels=train_labels,
	test_data_loader=finetune_data_loader,
	metric_type=metric_type,
	num_classes=num_classes,
	train_dtype=train_dtype,
	train_features_device=train_features_device,
	max_train_iters=max_train_iters,
	)

	if not finetune_on_val:
	logger.info("Best parameter found, concatenating features")
	train_features = torch.cat((train_features, finetune_features))
	train_labels = torch.cat((train_labels, finetune_labels))

	logger.info("Training final model")
	logreg_metric = build_metric(metric_type, num_classes=num_classes)
	evals = train_and_evaluate(
	C=best_C,
	max_iter=max_train_iters,
	train_features=train_features,
	train_labels=train_labels,
	logreg_metric=logreg_metric.clone(),
	test_data_loader=val_data_loader,
	eval_device=torch.cuda.current_device(),
	train_dtype=train_dtype,
	train_features_device=train_features_device,
	)

	best_stats = evals[1]["metrics"]

	best_stats["best_C"] = best_C

	logger.info(f"Log regression evaluation done in {int(time.time() - start)}s")
	return best_stats


	def eval_log_regression_with_model(
	model,
	train_dataset_str="ImageNet:split=TRAIN",
	val_dataset_str="ImageNet:split=VAL",
	finetune_dataset_str=None,
	autocast_dtype=torch.float,
	finetune_on_val=False,
	metric_type=MetricType.MEAN_ACCURACY,
	train_dtype=torch.float64,
	train_features_device=_CPU_DEVICE,
	max_train_iters=DEFAULT_MAX_ITER,
	):
	cudnn.benchmark = True

	transform = make_classification_eval_transform(resize_size=224)
	target_transform = None

	train_dataset = make_dataset(dataset_str=train_dataset_str, transform=transform, target_transform=target_transform)
	val_dataset = make_dataset(dataset_str=val_dataset_str, transform=transform, target_transform=target_transform)
	if finetune_dataset_str is not None:
	finetune_dataset = make_dataset(
	dataset_str=finetune_dataset_str, transform=transform, target_transform=target_transform
	)
	else:
	finetune_dataset = None

	with torch.cuda.amp.autocast(dtype=autocast_dtype):
	results_dict_logreg = eval_log_regression(
	model=model,
	train_dataset=train_dataset,
	val_dataset=val_dataset,
	finetune_dataset=finetune_dataset,
	metric_type=metric_type,
	batch_size=256,
	num_workers=0, # 5,
	finetune_on_val=finetune_on_val,
	train_dtype=train_dtype,
	train_features_device=train_features_device,
	max_train_iters=max_train_iters,
	)

	results_dict = {
	"top-1": results_dict_logreg["top-1"].cpu().numpy() * 100.0,
	"top-5": results_dict_logreg.get("top-5", torch.tensor(0.0)).cpu().numpy() * 100.0,
	"best_C": results_dict_logreg["best_C"],
	}
	logger.info(
	"\n".join(
	[
	"Training of the supervised logistic regression on frozen features completed.\n"
	"Top-1 test accuracy: {acc:.1f}".format(acc=results_dict["top-1"]),
	"Top-5 test accuracy: {acc:.1f}".format(acc=results_dict["top-5"]),
	"obtained for C = {c:.6f}".format(c=results_dict["best_C"]),
	]
	)
	)

	torch.distributed.barrier()
	return results_dict


	def main(args):
	model, autocast_dtype = setup_and_build_model(args)
	eval_log_regression_with_model(
	model=model,
	train_dataset_str=args.train_dataset_str,
	val_dataset_str=args.val_dataset_str,
	finetune_dataset_str=args.finetune_dataset_str,
	autocast_dtype=autocast_dtype,
	finetune_on_val=args.finetune_on_val,
	metric_type=args.metric_type,
	train_dtype=as_torch_dtype(args.train_dtype),
	train_features_device=torch.device(args.train_features_device),
	max_train_iters=args.max_train_iters,
	)
	return 0


	if __name__ == "__main__":
	description = "DINOv2 logistic regression evaluation"
	args_parser = get_args_parser(description=description)
	args = args_parser.parse_args()
	sys.exit(main(args))