|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| import os
|
| import sys
|
| from hydra.core.hydra_config import HydraConfig
|
| import hydra
|
| import warnings
|
| from timm import utils as timm_utils
|
| from types import SimpleNamespace
|
|
|
| warnings.filterwarnings("ignore")
|
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
| os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
|
|
| import tensorflow as tf
|
| import torch
|
| from omegaconf import DictConfig
|
| import mlflow
|
| import argparse
|
| import logging
|
| from typing import Optional
|
| from clearml import Task
|
| from clearml.backend_config.defs import get_active_config_file
|
|
|
| SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| sys.path.append(os.path.dirname(SCRIPT_DIR))
|
| from api import get_model, get_dataloaders, get_trainer, get_quantizer, get_evaluator, get_predictor
|
| from common.utils import mlflow_ini, set_gpu_memory_limit, get_random_seed, display_figures, log_to_file
|
| from common.benchmarking import benchmark, cloud_connect
|
| from common.evaluation import gen_load_val
|
| from common.prediction import gen_load_val_predict
|
| from image_classification.tf.src.utils import get_config
|
| from image_classification.tf.src.deployment import deploy, deploy_mpu
|
| from common.onnx_utils.onnx_model_convertor import torch_model_export_static
|
|
|
|
|
| def _process_mode(cfg: DictConfig = None) -> None:
|
| """
|
| Process the selected mode of operation.
|
|
|
| Args:
|
| cfg (DictConfig): The configuration object.
|
|
|
| Returns:
|
| None
|
| Raises:
|
| ValueError: If an invalid operation_mode is selected or if required datasets are missing.
|
| """
|
|
|
|
|
| mode = cfg.operation_mode
|
| mlflow.log_param("model_path", cfg.model.model_path)
|
| log_to_file(cfg.output_dir, f'operation_mode: {mode}')
|
|
|
|
|
| credentials = None
|
| if cfg.tools and cfg.tools.stedgeai and cfg.tools.stedgeai.on_cloud:
|
| _, _, credentials = cloud_connect(stedgeai_core_version=cfg.tools.stedgeai.version)
|
|
|
|
|
| model = get_model(cfg=cfg)
|
| saved_model_dir = os.path.join(cfg.output_dir, cfg.general.saved_models_dir)
|
| os.makedirs(saved_model_dir, exist_ok=True)
|
| if cfg.model.framework == 'torch' and isinstance(model, torch.nn.Module) and cfg.operation_mode not in ['training', 'chain_tb', 'chain_tqe', 'chain_tqeb', 'chain_tbqeb']:
|
|
|
|
|
| model = torch_model_export_static(cfg=cfg,
|
| model_dir=saved_model_dir,
|
| model=model)
|
|
|
|
|
| dataloaders = get_dataloaders(cfg=cfg)
|
|
|
|
|
| if mode == 'training':
|
| trainer = get_trainer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| trained_model = trainer.train()
|
| display_figures(cfg)
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=trained_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| elif mode == 'evaluation':
|
| gen_load_val(cfg=cfg, model=model)
|
| os.chdir(os.path.dirname(os.path.realpath(__file__)))
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| elif mode == 'deployment':
|
| if cfg.hardware_type == "MPU":
|
| deploy_mpu(cfg=cfg, model_path_to_deploy=model.model_path)
|
| else:
|
| deploy(cfg=cfg, model_path_to_deploy=model.model_path)
|
| elif mode == 'quantization':
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| elif mode == 'prediction':
|
| gen_load_val_predict(cfg=cfg, model=model)
|
| os.chdir(os.path.dirname(os.path.realpath(__file__)))
|
| predictor = get_predictor(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| predictor.predict()
|
| elif mode == 'benchmarking':
|
| benchmark(cfg=cfg, model_path_to_benchmark=model.model_path)
|
| elif mode == 'chain_tqe':
|
| trainer = get_trainer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| trained_model = trainer.train()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=trained_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=trained_model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=quantized_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| print('[INFO] : chain_tqe complete.')
|
| elif mode == 'chain_tqeb':
|
| trainer = get_trainer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| trained_model = trainer.train()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=trained_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=trained_model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=quantized_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| benchmark(cfg=cfg, model_path_to_benchmark=quantized_model.model_path, credentials=credentials)
|
| print('[INFO] : chain_tqeb complete.')
|
| elif mode == 'chain_eqe':
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=quantized_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| print('[INFO] : chain_eqe complete.')
|
| elif mode == 'chain_eqeb':
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| evaluator = get_evaluator(cfg=cfg,
|
| model=quantized_model,
|
| dataloaders=dataloaders)
|
| acc = evaluator.evaluate()
|
| benchmark(cfg=cfg, model_path_to_benchmark=quantized_model.model_path, credentials=credentials)
|
| print('[INFO] : chain_eqeb complete.')
|
| elif mode == 'chain_qb':
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| benchmark(cfg=cfg, model_path_to_benchmark=quantized_model.model_path, credentials=credentials)
|
| print('[INFO] : chain_qb complete.')
|
| elif mode == 'chain_qd':
|
| quantizer = get_quantizer(cfg=cfg,
|
| model=model,
|
| dataloaders=dataloaders)
|
| quantized_model = quantizer.quantize()
|
| if cfg.hardware_type == "MCU":
|
| deploy(cfg=cfg, model_path_to_deploy=quantized_model.model_path, credentials=credentials)
|
| else:
|
| deploy_mpu(cfg=cfg, model_path_to_deploy=quantized_model.model_path, credentials=credentials)
|
| print('[INFO] : chain_qd complete.')
|
|
|
| else:
|
| raise ValueError(f"Invalid mode: {mode}")
|
|
|
|
|
| mlflow.log_artifact(cfg.output_dir)
|
| if mode in ['benchmarking', 'chain_qb', 'chain_eqeb', 'chain_tqeb']:
|
| mlflow.log_param("stedgeai_core_version", cfg.tools.stedgeai.version)
|
| mlflow.log_param("target", cfg.benchmarking.board)
|
|
|
|
|
| log_to_file(cfg.output_dir, f'operation finished: {mode}')
|
|
|
|
|
|
|
| if get_active_config_file() is not None:
|
| print(f"[INFO] : ClearML task connection")
|
| task = Task.current_task()
|
| task.connect(cfg)
|
|
|
| def _fw_agnostic_initializations(cfg: DictConfig = None) -> DictConfig:
|
| """
|
| Framework-agnostic initializations.
|
|
|
| This function performs initializations that are independent of the specific deep learning framework being used.
|
| It includes parsing the configuration file, setting up MLFlow, and initializing ClearML if a valid configuration
|
| file is found.
|
|
|
| Args:
|
| cfg (DictConfig): Configuration object.
|
|
|
| Returns:
|
| DictConfig: Updated configuration object with initialized settings.
|
| """
|
|
|
| cfg = get_config(cfg)
|
| cfg.output_dir = HydraConfig.get().run.dir
|
|
|
|
|
|
|
| mlflow_ini(cfg)
|
|
|
|
|
| print(f"[INFO] : ClearML config check")
|
| if get_active_config_file() is not None:
|
|
|
| print(f"[INFO] : ClearML initialization and configuration")
|
|
|
| task = Task.init(project_name=cfg.general.project_name,
|
| task_name='ic_modelzoo_task')
|
|
|
| task.connect_configuration(name=cfg.operation_mode,
|
| configuration=cfg)
|
|
|
|
|
| return cfg
|
|
|
|
|
| def _tf_specific_initializations(cfg: DictConfig = None) -> None:
|
| """
|
| TensorFlow-specific initializations.
|
|
|
| This function performs initializations specific to TensorFlow, such as configuring GPU memory limits
|
| and setting a random seed for reproducibility.
|
|
|
| Args:
|
| cfg (DictConfig): Configuration object.
|
| """
|
|
|
| if "general" in cfg and cfg.general:
|
|
|
| if "gpu_memory_limit" in cfg.general and cfg.general.gpu_memory_limit:
|
| set_gpu_memory_limit(cfg.general.gpu_memory_limit)
|
| print(f"[INFO] : Setting upper limit of usable GPU memory to {int(cfg.general.gpu_memory_limit)}GBytes.")
|
| else:
|
|
|
| print("[WARNING] The usable GPU memory is unlimited.\n"
|
| "Please consider setting the 'gpu_memory_limit' attribute "
|
| "in the 'general' section of your configuration file.")
|
|
|
|
|
| seed = get_random_seed(cfg)
|
| print(f'[INFO] : The random seed for this simulation is {seed}')
|
| if seed is not None:
|
| tf.keras.utils.set_random_seed(seed)
|
|
|
|
|
| def _torch_specific_initializations(cfg: DictConfig = None) -> None:
|
| """
|
| PyTorch-specific initializations.
|
|
|
| This function is a placeholder for PyTorch-specific initializations, such as configuring GPU memory limits
|
| and setting a random seed for reproducibility.
|
|
|
| Args:
|
| cfg (DictConfig): Configuration object.
|
| """
|
| device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| temp_args = SimpleNamespace(
|
| device=device,
|
| )
|
| device = timm_utils.init_distributed_device(temp_args)
|
|
|
| cfg.device = temp_args.device
|
| cfg.world_size = temp_args.world_size
|
| cfg.rank = temp_args.rank
|
| cfg.local_rank = temp_args.local_rank
|
| cfg.distributed = temp_args.distributed
|
|
|
|
|
| @hydra.main(version_base=None, config_path="", config_name="user_config")
|
| def main(cfg: DictConfig) -> None:
|
| """
|
| Main entry point of the script.
|
|
|
| Args:
|
| cfg: Configuration dictionary.
|
|
|
| Returns:
|
| None
|
| """
|
|
|
| cfg = _fw_agnostic_initializations(cfg)
|
|
|
|
|
| if cfg.model.framework == "tf":
|
| _tf_specific_initializations(cfg)
|
| elif cfg.model.framework == "torch":
|
| _torch_specific_initializations(cfg)
|
| else:
|
| raise ValueError(f"Invalid framework used: {cfg.model.framework}")
|
|
|
|
|
| _process_mode(cfg=cfg)
|
|
|
|
|
|
|
| if __name__ == "__main__":
|
| parser = argparse.ArgumentParser()
|
| parser.add_argument('--config-path', type=str, default='./', help='Path to folder containing configuration file')
|
| parser.add_argument('--config-name', type=str, default='user_config.yaml', help='name of the configuration file')
|
|
|
| parser.add_argument('params', nargs='*',
|
| help='List of parameters to over-ride in config.yaml')
|
| args = parser.parse_args()
|
|
|
|
|
| main()
|
|
|
|
|
| mlflow.log_param('config_path', args.config_path)
|
| mlflow.log_param('config_name', args.config_name)
|
| mlflow.end_run()
|
|
|