| import glob |
| import json |
| import logging |
| import os |
| import sys |
| from pathlib import Path |
|
|
| logger = logging.getLogger(__name__) |
|
|
| FILE = Path(__file__).resolve() |
| ROOT = FILE.parents[3] |
| if str(ROOT) not in sys.path: |
| sys.path.append(str(ROOT)) |
|
|
| try: |
| import comet_ml |
|
|
| |
| config = comet_ml.config.get_config() |
| COMET_PROJECT_NAME = config.get_string(os.getenv('COMET_PROJECT_NAME'), 'comet.project_name', default='yolov5') |
| except (ModuleNotFoundError, ImportError): |
| comet_ml = None |
| COMET_PROJECT_NAME = None |
|
|
| import PIL |
| import torch |
| import torchvision.transforms as T |
| import yaml |
|
|
| from utils.dataloaders import img2label_paths |
| from utils.general import check_dataset, scale_boxes, xywh2xyxy |
| from utils.metrics import box_iou |
|
|
| COMET_PREFIX = 'comet://' |
|
|
| COMET_MODE = os.getenv('COMET_MODE', 'online') |
|
|
| |
| COMET_MODEL_NAME = os.getenv('COMET_MODEL_NAME', 'yolov5') |
|
|
| |
| COMET_UPLOAD_DATASET = os.getenv('COMET_UPLOAD_DATASET', 'false').lower() == 'true' |
|
|
| |
| COMET_LOG_CONFUSION_MATRIX = os.getenv('COMET_LOG_CONFUSION_MATRIX', 'true').lower() == 'true' |
| COMET_LOG_PREDICTIONS = os.getenv('COMET_LOG_PREDICTIONS', 'true').lower() == 'true' |
| COMET_MAX_IMAGE_UPLOADS = int(os.getenv('COMET_MAX_IMAGE_UPLOADS', 100)) |
|
|
| |
| CONF_THRES = float(os.getenv('CONF_THRES', 0.001)) |
| IOU_THRES = float(os.getenv('IOU_THRES', 0.6)) |
|
|
| |
| COMET_LOG_BATCH_METRICS = os.getenv('COMET_LOG_BATCH_METRICS', 'false').lower() == 'true' |
| COMET_BATCH_LOGGING_INTERVAL = os.getenv('COMET_BATCH_LOGGING_INTERVAL', 1) |
| COMET_PREDICTION_LOGGING_INTERVAL = os.getenv('COMET_PREDICTION_LOGGING_INTERVAL', 1) |
| COMET_LOG_PER_CLASS_METRICS = os.getenv('COMET_LOG_PER_CLASS_METRICS', 'false').lower() == 'true' |
|
|
| RANK = int(os.getenv('RANK', -1)) |
|
|
| to_pil = T.ToPILImage() |
|
|
|
|
| class CometLogger: |
| """Log metrics, parameters, source code, models and much more |
| with Comet |
| """ |
|
|
| def __init__(self, opt, hyp, run_id=None, job_type='Training', **experiment_kwargs) -> None: |
| self.job_type = job_type |
| self.opt = opt |
| self.hyp = hyp |
|
|
| |
| self.comet_mode = COMET_MODE |
|
|
| self.save_model = opt.save_period > -1 |
| self.model_name = COMET_MODEL_NAME |
|
|
| |
| self.log_batch_metrics = COMET_LOG_BATCH_METRICS |
| self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL |
|
|
| |
| self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET |
| self.resume = self.opt.resume |
|
|
| |
| self.default_experiment_kwargs = { |
| 'log_code': False, |
| 'log_env_gpu': True, |
| 'log_env_cpu': True, |
| 'project_name': COMET_PROJECT_NAME,} |
| self.default_experiment_kwargs.update(experiment_kwargs) |
| self.experiment = self._get_experiment(self.comet_mode, run_id) |
|
|
| self.data_dict = self.check_dataset(self.opt.data) |
| self.class_names = self.data_dict['names'] |
| self.num_classes = self.data_dict['nc'] |
|
|
| self.logged_images_count = 0 |
| self.max_images = COMET_MAX_IMAGE_UPLOADS |
|
|
| if run_id is None: |
| self.experiment.log_other('Created from', 'YOLOv5') |
| if not isinstance(self.experiment, comet_ml.OfflineExperiment): |
| workspace, project_name, experiment_id = self.experiment.url.split('/')[-3:] |
| self.experiment.log_other( |
| 'Run Path', |
| f'{workspace}/{project_name}/{experiment_id}', |
| ) |
| self.log_parameters(vars(opt)) |
| self.log_parameters(self.opt.hyp) |
| self.log_asset_data( |
| self.opt.hyp, |
| name='hyperparameters.json', |
| metadata={'type': 'hyp-config-file'}, |
| ) |
| self.log_asset( |
| f'{self.opt.save_dir}/opt.yaml', |
| metadata={'type': 'opt-config-file'}, |
| ) |
|
|
| self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX |
|
|
| if hasattr(self.opt, 'conf_thres'): |
| self.conf_thres = self.opt.conf_thres |
| else: |
| self.conf_thres = CONF_THRES |
| if hasattr(self.opt, 'iou_thres'): |
| self.iou_thres = self.opt.iou_thres |
| else: |
| self.iou_thres = IOU_THRES |
|
|
| self.log_parameters({'val_iou_threshold': self.iou_thres, 'val_conf_threshold': self.conf_thres}) |
|
|
| self.comet_log_predictions = COMET_LOG_PREDICTIONS |
| if self.opt.bbox_interval == -1: |
| self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10 |
| else: |
| self.comet_log_prediction_interval = self.opt.bbox_interval |
|
|
| if self.comet_log_predictions: |
| self.metadata_dict = {} |
| self.logged_image_names = [] |
|
|
| self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS |
|
|
| self.experiment.log_others({ |
| 'comet_mode': COMET_MODE, |
| 'comet_max_image_uploads': COMET_MAX_IMAGE_UPLOADS, |
| 'comet_log_per_class_metrics': COMET_LOG_PER_CLASS_METRICS, |
| 'comet_log_batch_metrics': COMET_LOG_BATCH_METRICS, |
| 'comet_log_confusion_matrix': COMET_LOG_CONFUSION_MATRIX, |
| 'comet_model_name': COMET_MODEL_NAME,}) |
|
|
| |
| if hasattr(self.opt, 'comet_optimizer_id'): |
| self.experiment.log_other('optimizer_id', self.opt.comet_optimizer_id) |
| self.experiment.log_other('optimizer_objective', self.opt.comet_optimizer_objective) |
| self.experiment.log_other('optimizer_metric', self.opt.comet_optimizer_metric) |
| self.experiment.log_other('optimizer_parameters', json.dumps(self.hyp)) |
|
|
| def _get_experiment(self, mode, experiment_id=None): |
| if mode == 'offline': |
| if experiment_id is not None: |
| return comet_ml.ExistingOfflineExperiment( |
| previous_experiment=experiment_id, |
| **self.default_experiment_kwargs, |
| ) |
|
|
| return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,) |
|
|
| else: |
| try: |
| if experiment_id is not None: |
| return comet_ml.ExistingExperiment( |
| previous_experiment=experiment_id, |
| **self.default_experiment_kwargs, |
| ) |
|
|
| return comet_ml.Experiment(**self.default_experiment_kwargs) |
|
|
| except ValueError: |
| logger.warning('COMET WARNING: ' |
| 'Comet credentials have not been set. ' |
| 'Comet will default to offline logging. ' |
| 'Please set your credentials to enable online logging.') |
| return self._get_experiment('offline', experiment_id) |
|
|
| return |
|
|
| def log_metrics(self, log_dict, **kwargs): |
| self.experiment.log_metrics(log_dict, **kwargs) |
|
|
| def log_parameters(self, log_dict, **kwargs): |
| self.experiment.log_parameters(log_dict, **kwargs) |
|
|
| def log_asset(self, asset_path, **kwargs): |
| self.experiment.log_asset(asset_path, **kwargs) |
|
|
| def log_asset_data(self, asset, **kwargs): |
| self.experiment.log_asset_data(asset, **kwargs) |
|
|
| def log_image(self, img, **kwargs): |
| self.experiment.log_image(img, **kwargs) |
|
|
| def log_model(self, path, opt, epoch, fitness_score, best_model=False): |
| if not self.save_model: |
| return |
|
|
| model_metadata = { |
| 'fitness_score': fitness_score[-1], |
| 'epochs_trained': epoch + 1, |
| 'save_period': opt.save_period, |
| 'total_epochs': opt.epochs,} |
|
|
| model_files = glob.glob(f'{path}/*.pt') |
| for model_path in model_files: |
| name = Path(model_path).name |
|
|
| self.experiment.log_model( |
| self.model_name, |
| file_or_folder=model_path, |
| file_name=name, |
| metadata=model_metadata, |
| overwrite=True, |
| ) |
|
|
| def check_dataset(self, data_file): |
| with open(data_file) as f: |
| data_config = yaml.safe_load(f) |
|
|
| if data_config['path'].startswith(COMET_PREFIX): |
| path = data_config['path'].replace(COMET_PREFIX, '') |
| data_dict = self.download_dataset_artifact(path) |
|
|
| return data_dict |
|
|
| self.log_asset(self.opt.data, metadata={'type': 'data-config-file'}) |
|
|
| return check_dataset(data_file) |
|
|
| def log_predictions(self, image, labelsn, path, shape, predn): |
| if self.logged_images_count >= self.max_images: |
| return |
| detections = predn[predn[:, 4] > self.conf_thres] |
| iou = box_iou(labelsn[:, 1:], detections[:, :4]) |
| mask, _ = torch.where(iou > self.iou_thres) |
| if len(mask) == 0: |
| return |
|
|
| filtered_detections = detections[mask] |
| filtered_labels = labelsn[mask] |
|
|
| image_id = path.split('/')[-1].split('.')[0] |
| image_name = f'{image_id}_curr_epoch_{self.experiment.curr_epoch}' |
| if image_name not in self.logged_image_names: |
| native_scale_image = PIL.Image.open(path) |
| self.log_image(native_scale_image, name=image_name) |
| self.logged_image_names.append(image_name) |
|
|
| metadata = [] |
| for cls, *xyxy in filtered_labels.tolist(): |
| metadata.append({ |
| 'label': f'{self.class_names[int(cls)]}-gt', |
| 'score': 100, |
| 'box': { |
| 'x': xyxy[0], |
| 'y': xyxy[1], |
| 'x2': xyxy[2], |
| 'y2': xyxy[3]},}) |
| for *xyxy, conf, cls in filtered_detections.tolist(): |
| metadata.append({ |
| 'label': f'{self.class_names[int(cls)]}', |
| 'score': conf * 100, |
| 'box': { |
| 'x': xyxy[0], |
| 'y': xyxy[1], |
| 'x2': xyxy[2], |
| 'y2': xyxy[3]},}) |
|
|
| self.metadata_dict[image_name] = metadata |
| self.logged_images_count += 1 |
|
|
| return |
|
|
| def preprocess_prediction(self, image, labels, shape, pred): |
| nl, _ = labels.shape[0], pred.shape[0] |
|
|
| |
| if self.opt.single_cls: |
| pred[:, 5] = 0 |
|
|
| predn = pred.clone() |
| scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) |
|
|
| labelsn = None |
| if nl: |
| tbox = xywh2xyxy(labels[:, 1:5]) |
| scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) |
| labelsn = torch.cat((labels[:, 0:1], tbox), 1) |
| scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) |
|
|
| return predn, labelsn |
|
|
| def add_assets_to_artifact(self, artifact, path, asset_path, split): |
| img_paths = sorted(glob.glob(f'{asset_path}/*')) |
| label_paths = img2label_paths(img_paths) |
|
|
| for image_file, label_file in zip(img_paths, label_paths): |
| image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file]) |
|
|
| try: |
| artifact.add(image_file, logical_path=image_logical_path, metadata={'split': split}) |
| artifact.add(label_file, logical_path=label_logical_path, metadata={'split': split}) |
| except ValueError as e: |
| logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.') |
| logger.error(f'COMET ERROR: {e}') |
| continue |
|
|
| return artifact |
|
|
| def upload_dataset_artifact(self): |
| dataset_name = self.data_dict.get('dataset_name', 'yolov5-dataset') |
| path = str((ROOT / Path(self.data_dict['path'])).resolve()) |
|
|
| metadata = self.data_dict.copy() |
| for key in ['train', 'val', 'test']: |
| split_path = metadata.get(key) |
| if split_path is not None: |
| metadata[key] = split_path.replace(path, '') |
|
|
| artifact = comet_ml.Artifact(name=dataset_name, artifact_type='dataset', metadata=metadata) |
| for key in metadata.keys(): |
| if key in ['train', 'val', 'test']: |
| if isinstance(self.upload_dataset, str) and (key != self.upload_dataset): |
| continue |
|
|
| asset_path = self.data_dict.get(key) |
| if asset_path is not None: |
| artifact = self.add_assets_to_artifact(artifact, path, asset_path, key) |
|
|
| self.experiment.log_artifact(artifact) |
|
|
| return |
|
|
| def download_dataset_artifact(self, artifact_path): |
| logged_artifact = self.experiment.get_artifact(artifact_path) |
| artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name) |
| logged_artifact.download(artifact_save_dir) |
|
|
| metadata = logged_artifact.metadata |
| data_dict = metadata.copy() |
| data_dict['path'] = artifact_save_dir |
|
|
| metadata_names = metadata.get('names') |
| if type(metadata_names) == dict: |
| data_dict['names'] = {int(k): v for k, v in metadata.get('names').items()} |
| elif type(metadata_names) == list: |
| data_dict['names'] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)} |
| else: |
| raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary" |
|
|
| data_dict = self.update_data_paths(data_dict) |
| return data_dict |
|
|
| def update_data_paths(self, data_dict): |
| path = data_dict.get('path', '') |
|
|
| for split in ['train', 'val', 'test']: |
| if data_dict.get(split): |
| split_path = data_dict.get(split) |
| data_dict[split] = (f'{path}/{split_path}' if isinstance(split, str) else [ |
| f'{path}/{x}' for x in split_path]) |
|
|
| return data_dict |
|
|
| def on_pretrain_routine_end(self, paths): |
| if self.opt.resume: |
| return |
|
|
| for path in paths: |
| self.log_asset(str(path)) |
|
|
| if self.upload_dataset: |
| if not self.resume: |
| self.upload_dataset_artifact() |
|
|
| return |
|
|
| def on_train_start(self): |
| self.log_parameters(self.hyp) |
|
|
| def on_train_epoch_start(self): |
| return |
|
|
| def on_train_epoch_end(self, epoch): |
| self.experiment.curr_epoch = epoch |
|
|
| return |
|
|
| def on_train_batch_start(self): |
| return |
|
|
| def on_train_batch_end(self, log_dict, step): |
| self.experiment.curr_step = step |
| if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0): |
| self.log_metrics(log_dict, step=step) |
|
|
| return |
|
|
| def on_train_end(self, files, save_dir, last, best, epoch, results): |
| if self.comet_log_predictions: |
| curr_epoch = self.experiment.curr_epoch |
| self.experiment.log_asset_data(self.metadata_dict, 'image-metadata.json', epoch=curr_epoch) |
|
|
| for f in files: |
| self.log_asset(f, metadata={'epoch': epoch}) |
| self.log_asset(f'{save_dir}/results.csv', metadata={'epoch': epoch}) |
|
|
| if not self.opt.evolve: |
| model_path = str(best if best.exists() else last) |
| name = Path(model_path).name |
| if self.save_model: |
| self.experiment.log_model( |
| self.model_name, |
| file_or_folder=model_path, |
| file_name=name, |
| overwrite=True, |
| ) |
|
|
| |
| if hasattr(self.opt, 'comet_optimizer_id'): |
| metric = results.get(self.opt.comet_optimizer_metric) |
| self.experiment.log_other('optimizer_metric_value', metric) |
|
|
| self.finish_run() |
|
|
| def on_val_start(self): |
| return |
|
|
| def on_val_batch_start(self): |
| return |
|
|
| def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs): |
| if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)): |
| return |
|
|
| for si, pred in enumerate(outputs): |
| if len(pred) == 0: |
| continue |
|
|
| image = images[si] |
| labels = targets[targets[:, 0] == si, 1:] |
| shape = shapes[si] |
| path = paths[si] |
| predn, labelsn = self.preprocess_prediction(image, labels, shape, pred) |
| if labelsn is not None: |
| self.log_predictions(image, labelsn, path, shape, predn) |
|
|
| return |
|
|
| def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix): |
| if self.comet_log_per_class_metrics: |
| if self.num_classes > 1: |
| for i, c in enumerate(ap_class): |
| class_name = self.class_names[c] |
| self.experiment.log_metrics( |
| { |
| 'mAP@.5': ap50[i], |
| 'mAP@.5:.95': ap[i], |
| 'precision': p[i], |
| 'recall': r[i], |
| 'f1': f1[i], |
| 'true_positives': tp[i], |
| 'false_positives': fp[i], |
| 'support': nt[c]}, |
| prefix=class_name) |
|
|
| if self.comet_log_confusion_matrix: |
| epoch = self.experiment.curr_epoch |
| class_names = list(self.class_names.values()) |
| class_names.append('background') |
| num_classes = len(class_names) |
|
|
| self.experiment.log_confusion_matrix( |
| matrix=confusion_matrix.matrix, |
| max_categories=num_classes, |
| labels=class_names, |
| epoch=epoch, |
| column_label='Actual Category', |
| row_label='Predicted Category', |
| file_name=f'confusion-matrix-epoch-{epoch}.json', |
| ) |
|
|
| def on_fit_epoch_end(self, result, epoch): |
| self.log_metrics(result, epoch=epoch) |
|
|
| def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): |
| if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: |
| self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) |
|
|
| def on_params_update(self, params): |
| self.log_parameters(params) |
|
|
| def finish_run(self): |
| self.experiment.end() |
|
|