"""Module containing commands line scripts for training and planning steps.""" import os import warnings from pathlib import Path import click import yaml from synplan.chem.data.filtering import ReactionFilterConfig, filter_reactions_from_file from synplan.chem.data.standardizing import ( ReactionStandardizationConfig, standardize_reactions_from_file, ) from synplan.chem.reaction_rules.extraction import extract_rules_from_reactions from synplan.chem.reaction_routes.clustering import run_cluster_cli from synplan.chem.utils import standardize_building_blocks from synplan.mcts.search import run_search from synplan.ml.training.supervised import create_policy_dataset, run_policy_training from synplan.ml.training.reinforcement import run_updating from synplan.utils.config import ( PolicyNetworkConfig, RuleExtractionConfig, TreeConfig, TuningConfig, ValueNetworkConfig, ) from synplan.utils.loading import download_all_data from synplan.utils.visualisation import ( routes_clustering_report, routes_subclustering_report, ) warnings.filterwarnings("ignore") @click.group(name="synplan") def synplan(): """SynPlanner command line interface.""" @synplan.command(name="download_all_data") @click.option( "--save_to", "save_to", help="Path to the folder where downloaded data will be stored.", ) def download_all_data_cli(save_to: str = ".") -> None: """Downloads all data for training, planning and benchmarking SynPlanner.""" download_all_data(save_to=save_to) @synplan.command(name="building_blocks_standardizing") @click.option( "--input", "input_file", required=True, type=click.Path(exists=True), help="Path to the file with building blocks to be standardized.", ) @click.option( "--output", "output_file", required=True, type=click.Path(), help="Path to the file where standardized building blocks will be stored.", ) def building_blocks_standardizing_cli(input_file: str, output_file: str) -> None: """Standardizes building blocks.""" standardize_building_blocks(input_file=input_file, output_file=output_file) @synplan.command(name="reaction_standardizing") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for reactions standardizing.", ) @click.option( "--input", "input_file", required=True, type=click.Path(exists=True), help="Path to the file with reactions to be standardized.", ) @click.option( "--output", "output_file", type=click.Path(), help="Path to the file where standardized reactions will be stored.", ) @click.option( "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." ) def reaction_standardizing_cli( config_path: str, input_file: str, output_file: str, num_cpus: int ) -> None: """Standardizes reactions and remove duplicates.""" stand_config = ReactionStandardizationConfig.from_yaml(config_path) standardize_reactions_from_file( config=stand_config, input_reaction_data_path=input_file, standardized_reaction_data_path=output_file, num_cpus=num_cpus, batch_size=100, ) @synplan.command(name="reaction_filtering") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for reactions filtering.", ) @click.option( "--input", "input_file", required=True, type=click.Path(exists=True), help="Path to the file with reactions to be filtered.", ) @click.option( "--output", "output_file", default=Path("./"), type=click.Path(), help="Path to the file where successfully filtered reactions will be stored.", ) @click.option( "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." ) def reaction_filtering_cli( config_path: str, input_file: str, output_file: str, num_cpus: int ): """Filters erroneous reactions.""" reaction_check_config = ReactionFilterConfig().from_yaml(config_path) filter_reactions_from_file( config=reaction_check_config, input_reaction_data_path=input_file, filtered_reaction_data_path=output_file, num_cpus=num_cpus, batch_size=100, ) @synplan.command(name="rule_extracting") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for reaction rules extracting.", ) @click.option( "--input", "input_file", required=True, type=click.Path(exists=True), help="Path to the file with reactions for reaction rules extraction.", ) @click.option( "--output", "output_file", required=True, type=click.Path(), help="Path to the file where extracted reaction rules will be stored.", ) @click.option( "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." ) def rule_extracting_cli( config_path: str, input_file: str, output_file: str, num_cpus: int ): """Reaction rules extraction.""" reaction_rule_config = RuleExtractionConfig.from_yaml(config_path) extract_rules_from_reactions( config=reaction_rule_config, reaction_data_path=input_file, reaction_rules_path=output_file, num_cpus=num_cpus, batch_size=100, ) @synplan.command(name="ranking_policy_training") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for ranking policy training.", ) @click.option( "--reaction_data", required=True, type=click.Path(exists=True), help="Path to the file with reactions for ranking policy training.", ) @click.option( "--reaction_rules", required=True, type=click.Path(exists=True), help="Path to the file with extracted reaction rules.", ) @click.option( "--results_dir", default=Path("."), type=click.Path(), help="Path to the directory where the trained policy network will be stored.", ) @click.option( "--num_cpus", default=4, type=int, help="The number of CPUs to use for training set preparation.", ) def ranking_policy_training_cli( config_path: str, reaction_data: str, reaction_rules: str, results_dir: str, num_cpus: int, ) -> None: """Ranking policy network training.""" policy_config = PolicyNetworkConfig.from_yaml(config_path) policy_config.policy_type = "ranking" policy_dataset_file = os.path.join(results_dir, "policy_dataset.dt") datamodule = create_policy_dataset( reaction_rules_path=reaction_rules, molecules_or_reactions_path=reaction_data, output_path=policy_dataset_file, dataset_type="ranking", batch_size=policy_config.batch_size, num_cpus=num_cpus, ) run_policy_training(datamodule, config=policy_config, results_path=results_dir) @synplan.command(name="filtering_policy_training") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for filtering policy training.", ) @click.option( "--molecule_data", required=True, type=click.Path(exists=True), help="Path to the file with molecules for filtering policy training.", ) @click.option( "--reaction_rules", required=True, type=click.Path(exists=True), help="Path to the file with extracted reaction rules.", ) @click.option( "--results_dir", default=Path("."), type=click.Path(), help="Path to the directory where the trained policy network will be stored.", ) @click.option( "--num_cpus", default=8, type=int, help="The number of CPUs to use for training set preparation.", ) def filtering_policy_training_cli( config_path: str, molecule_data: str, reaction_rules: str, results_dir: str, num_cpus: int, ): """Filtering policy network training.""" policy_config = PolicyNetworkConfig.from_yaml(config_path) policy_config.policy_type = "filtering" policy_dataset_file = os.path.join(results_dir, "policy_dataset.ckpt") datamodule = create_policy_dataset( reaction_rules_path=reaction_rules, molecules_or_reactions_path=molecule_data, output_path=policy_dataset_file, dataset_type="filtering", batch_size=policy_config.batch_size, num_cpus=num_cpus, ) run_policy_training(datamodule, config=policy_config, results_path=results_dir) @synplan.command(name="value_network_tuning") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for value network training.", ) @click.option( "--targets", required=True, type=click.Path(exists=True), help="Path to the file with target molecules for planning simulations.", ) @click.option( "--reaction_rules", required=True, type=click.Path(exists=True), help="Path to the file with extracted reaction rules. Needed for planning simulations.", ) @click.option( "--building_blocks", required=True, type=click.Path(exists=True), help="Path to the file with building blocks. Needed for planning simulations.", ) @click.option( "--policy_network", required=True, type=click.Path(exists=True), help="Path to the file with trained policy network. Needed for planning simulations.", ) @click.option( "--value_network", default=None, type=click.Path(exists=True), help="Path to the file with trained value network. Needed in case of additional value network fine-tuning", ) @click.option( "--results_dir", default=".", type=click.Path(exists=False), help="Path to the directory where the trained value network will be stored.", ) def value_network_tuning_cli( config_path: str, targets: str, reaction_rules: str, building_blocks: str, policy_network: str, value_network: str, results_dir: str, ): """Value network tuning.""" with open(config_path, "r", encoding="utf-8") as file: config = yaml.safe_load(file) policy_config = PolicyNetworkConfig.from_dict(config["node_expansion"]) policy_config.weights_path = policy_network value_config = ValueNetworkConfig.from_dict(config["value_network"]) if value_network is None: value_config.weights_path = os.path.join( results_dir, "weights", "value_network.ckpt" ) tree_config = TreeConfig.from_dict(config["tree"]) tuning_config = TuningConfig.from_dict(config["tuning"]) run_updating( targets_path=targets, tree_config=tree_config, policy_config=policy_config, value_config=value_config, reinforce_config=tuning_config, reaction_rules_path=reaction_rules, building_blocks_path=building_blocks, results_root=results_dir, ) @synplan.command(name="planning") @click.option( "--config", "config_path", required=True, type=click.Path(exists=True), help="Path to the configuration file for retrosynthetic planning.", ) @click.option( "--targets", required=True, type=click.Path(exists=True), help="Path to the file with target molecules for retrosynthetic planning.", ) @click.option( "--reaction_rules", required=True, type=click.Path(exists=True), help="Path to the file with extracted reaction rules.", ) @click.option( "--building_blocks", required=True, type=click.Path(exists=True), help="Path to the file with building blocks.", ) @click.option( "--policy_network", required=True, type=click.Path(exists=True), help="Path to the file with trained policy network.", ) @click.option( "--value_network", default=None, type=click.Path(exists=True), help="Path to the file with trained value network.", ) @click.option( "--results_dir", default=".", type=click.Path(exists=False), help="Path to the file where retrosynthetic planning results will be stored.", ) def planning_cli( config_path: str, targets: str, reaction_rules: str, building_blocks: str, policy_network: str, value_network: str, results_dir: str, ): """Retrosynthetic planning.""" with open(config_path, "r", encoding="utf-8") as file: config = yaml.safe_load(file) search_config = {**config["tree"], **config["node_evaluation"]} policy_config = PolicyNetworkConfig.from_dict( {**config["node_expansion"], **{"weights_path": policy_network}} ) run_search( targets_path=targets, search_config=search_config, policy_config=policy_config, reaction_rules_path=reaction_rules, building_blocks_path=building_blocks, value_network_path=value_network, results_root=results_dir, ) @synplan.command(name="clustering") @click.option( "--targets", required=True, type=click.Path(exists=True), help="Path to the file with target molecules for retrosynthetic planning.", ) @click.option( "--routes_file", default=".", type=click.Path(exists=False), help="Path to the file where the planning results are stored.", ) @click.option( "--cluster_results_dir", default=".", type=click.Path(exists=False), help="Path to the file where clustering results will be stored.", ) @click.option( "--perform_subcluster", default=None, type=click.Path(exists=False), help="Perform subclustering.", ) @click.option( "--subcluster_results_dir", default=".", type=click.Path(exists=False), help="Path to the file where subclustering results will be stored.", ) def cluster_route_from_file_cli( targets: str, routes_file: str, cluster_results_dir: str, perform_subcluster: bool, subcluster_results_dir: str, ): """Clustering the routes from planning""" run_cluster_cli( routes_file=routes_file, cluster_results_dir=cluster_results_dir, perform_subcluster=perform_subcluster, subcluster_results_dir=subcluster_results_dir if perform_subcluster else None, ) if __name__ == "__main__": synplan()