Gilmullin Almaz
Refactor code structure and remove redundant sections for improved readability and maintainability
914ea41
"""Module containing commands line scripts for training and planning steps."""
import os
import warnings
from pathlib import Path
import click
import yaml
from synplan.chem.data.filtering import ReactionFilterConfig, filter_reactions_from_file
from synplan.chem.data.standardizing import (
ReactionStandardizationConfig,
standardize_reactions_from_file,
)
from synplan.chem.reaction_rules.extraction import extract_rules_from_reactions
from synplan.chem.reaction_routes.clustering import run_cluster_cli
from synplan.chem.utils import standardize_building_blocks
from synplan.mcts.search import run_search
from synplan.ml.training.supervised import create_policy_dataset, run_policy_training
from synplan.ml.training.reinforcement import run_updating
from synplan.utils.config import (
PolicyNetworkConfig,
RuleExtractionConfig,
TreeConfig,
TuningConfig,
ValueNetworkConfig,
)
from synplan.utils.loading import download_all_data
from synplan.utils.visualisation import (
routes_clustering_report,
routes_subclustering_report,
)
warnings.filterwarnings("ignore")
@click.group(name="synplan")
def synplan():
"""SynPlanner command line interface."""
@synplan.command(name="download_all_data")
@click.option(
"--save_to",
"save_to",
help="Path to the folder where downloaded data will be stored.",
)
def download_all_data_cli(save_to: str = ".") -> None:
"""Downloads all data for training, planning and benchmarking SynPlanner."""
download_all_data(save_to=save_to)
@synplan.command(name="building_blocks_standardizing")
@click.option(
"--input",
"input_file",
required=True,
type=click.Path(exists=True),
help="Path to the file with building blocks to be standardized.",
)
@click.option(
"--output",
"output_file",
required=True,
type=click.Path(),
help="Path to the file where standardized building blocks will be stored.",
)
def building_blocks_standardizing_cli(input_file: str, output_file: str) -> None:
"""Standardizes building blocks."""
standardize_building_blocks(input_file=input_file, output_file=output_file)
@synplan.command(name="reaction_standardizing")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for reactions standardizing.",
)
@click.option(
"--input",
"input_file",
required=True,
type=click.Path(exists=True),
help="Path to the file with reactions to be standardized.",
)
@click.option(
"--output",
"output_file",
type=click.Path(),
help="Path to the file where standardized reactions will be stored.",
)
@click.option(
"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
)
def reaction_standardizing_cli(
config_path: str, input_file: str, output_file: str, num_cpus: int
) -> None:
"""Standardizes reactions and remove duplicates."""
stand_config = ReactionStandardizationConfig.from_yaml(config_path)
standardize_reactions_from_file(
config=stand_config,
input_reaction_data_path=input_file,
standardized_reaction_data_path=output_file,
num_cpus=num_cpus,
batch_size=100,
)
@synplan.command(name="reaction_filtering")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for reactions filtering.",
)
@click.option(
"--input",
"input_file",
required=True,
type=click.Path(exists=True),
help="Path to the file with reactions to be filtered.",
)
@click.option(
"--output",
"output_file",
default=Path("./"),
type=click.Path(),
help="Path to the file where successfully filtered reactions will be stored.",
)
@click.option(
"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
)
def reaction_filtering_cli(
config_path: str, input_file: str, output_file: str, num_cpus: int
):
"""Filters erroneous reactions."""
reaction_check_config = ReactionFilterConfig().from_yaml(config_path)
filter_reactions_from_file(
config=reaction_check_config,
input_reaction_data_path=input_file,
filtered_reaction_data_path=output_file,
num_cpus=num_cpus,
batch_size=100,
)
@synplan.command(name="rule_extracting")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for reaction rules extracting.",
)
@click.option(
"--input",
"input_file",
required=True,
type=click.Path(exists=True),
help="Path to the file with reactions for reaction rules extraction.",
)
@click.option(
"--output",
"output_file",
required=True,
type=click.Path(),
help="Path to the file where extracted reaction rules will be stored.",
)
@click.option(
"--num_cpus", default=4, type=int, help="The number of CPUs to use for processing."
)
def rule_extracting_cli(
config_path: str, input_file: str, output_file: str, num_cpus: int
):
"""Reaction rules extraction."""
reaction_rule_config = RuleExtractionConfig.from_yaml(config_path)
extract_rules_from_reactions(
config=reaction_rule_config,
reaction_data_path=input_file,
reaction_rules_path=output_file,
num_cpus=num_cpus,
batch_size=100,
)
@synplan.command(name="ranking_policy_training")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for ranking policy training.",
)
@click.option(
"--reaction_data",
required=True,
type=click.Path(exists=True),
help="Path to the file with reactions for ranking policy training.",
)
@click.option(
"--reaction_rules",
required=True,
type=click.Path(exists=True),
help="Path to the file with extracted reaction rules.",
)
@click.option(
"--results_dir",
default=Path("."),
type=click.Path(),
help="Path to the directory where the trained policy network will be stored.",
)
@click.option(
"--num_cpus",
default=4,
type=int,
help="The number of CPUs to use for training set preparation.",
)
def ranking_policy_training_cli(
config_path: str,
reaction_data: str,
reaction_rules: str,
results_dir: str,
num_cpus: int,
) -> None:
"""Ranking policy network training."""
policy_config = PolicyNetworkConfig.from_yaml(config_path)
policy_config.policy_type = "ranking"
policy_dataset_file = os.path.join(results_dir, "policy_dataset.dt")
datamodule = create_policy_dataset(
reaction_rules_path=reaction_rules,
molecules_or_reactions_path=reaction_data,
output_path=policy_dataset_file,
dataset_type="ranking",
batch_size=policy_config.batch_size,
num_cpus=num_cpus,
)
run_policy_training(datamodule, config=policy_config, results_path=results_dir)
@synplan.command(name="filtering_policy_training")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for filtering policy training.",
)
@click.option(
"--molecule_data",
required=True,
type=click.Path(exists=True),
help="Path to the file with molecules for filtering policy training.",
)
@click.option(
"--reaction_rules",
required=True,
type=click.Path(exists=True),
help="Path to the file with extracted reaction rules.",
)
@click.option(
"--results_dir",
default=Path("."),
type=click.Path(),
help="Path to the directory where the trained policy network will be stored.",
)
@click.option(
"--num_cpus",
default=8,
type=int,
help="The number of CPUs to use for training set preparation.",
)
def filtering_policy_training_cli(
config_path: str,
molecule_data: str,
reaction_rules: str,
results_dir: str,
num_cpus: int,
):
"""Filtering policy network training."""
policy_config = PolicyNetworkConfig.from_yaml(config_path)
policy_config.policy_type = "filtering"
policy_dataset_file = os.path.join(results_dir, "policy_dataset.ckpt")
datamodule = create_policy_dataset(
reaction_rules_path=reaction_rules,
molecules_or_reactions_path=molecule_data,
output_path=policy_dataset_file,
dataset_type="filtering",
batch_size=policy_config.batch_size,
num_cpus=num_cpus,
)
run_policy_training(datamodule, config=policy_config, results_path=results_dir)
@synplan.command(name="value_network_tuning")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for value network training.",
)
@click.option(
"--targets",
required=True,
type=click.Path(exists=True),
help="Path to the file with target molecules for planning simulations.",
)
@click.option(
"--reaction_rules",
required=True,
type=click.Path(exists=True),
help="Path to the file with extracted reaction rules. Needed for planning simulations.",
)
@click.option(
"--building_blocks",
required=True,
type=click.Path(exists=True),
help="Path to the file with building blocks. Needed for planning simulations.",
)
@click.option(
"--policy_network",
required=True,
type=click.Path(exists=True),
help="Path to the file with trained policy network. Needed for planning simulations.",
)
@click.option(
"--value_network",
default=None,
type=click.Path(exists=True),
help="Path to the file with trained value network. Needed in case of additional value network fine-tuning",
)
@click.option(
"--results_dir",
default=".",
type=click.Path(exists=False),
help="Path to the directory where the trained value network will be stored.",
)
def value_network_tuning_cli(
config_path: str,
targets: str,
reaction_rules: str,
building_blocks: str,
policy_network: str,
value_network: str,
results_dir: str,
):
"""Value network tuning."""
with open(config_path, "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
policy_config = PolicyNetworkConfig.from_dict(config["node_expansion"])
policy_config.weights_path = policy_network
value_config = ValueNetworkConfig.from_dict(config["value_network"])
if value_network is None:
value_config.weights_path = os.path.join(
results_dir, "weights", "value_network.ckpt"
)
tree_config = TreeConfig.from_dict(config["tree"])
tuning_config = TuningConfig.from_dict(config["tuning"])
run_updating(
targets_path=targets,
tree_config=tree_config,
policy_config=policy_config,
value_config=value_config,
reinforce_config=tuning_config,
reaction_rules_path=reaction_rules,
building_blocks_path=building_blocks,
results_root=results_dir,
)
@synplan.command(name="planning")
@click.option(
"--config",
"config_path",
required=True,
type=click.Path(exists=True),
help="Path to the configuration file for retrosynthetic planning.",
)
@click.option(
"--targets",
required=True,
type=click.Path(exists=True),
help="Path to the file with target molecules for retrosynthetic planning.",
)
@click.option(
"--reaction_rules",
required=True,
type=click.Path(exists=True),
help="Path to the file with extracted reaction rules.",
)
@click.option(
"--building_blocks",
required=True,
type=click.Path(exists=True),
help="Path to the file with building blocks.",
)
@click.option(
"--policy_network",
required=True,
type=click.Path(exists=True),
help="Path to the file with trained policy network.",
)
@click.option(
"--value_network",
default=None,
type=click.Path(exists=True),
help="Path to the file with trained value network.",
)
@click.option(
"--results_dir",
default=".",
type=click.Path(exists=False),
help="Path to the file where retrosynthetic planning results will be stored.",
)
def planning_cli(
config_path: str,
targets: str,
reaction_rules: str,
building_blocks: str,
policy_network: str,
value_network: str,
results_dir: str,
):
"""Retrosynthetic planning."""
with open(config_path, "r", encoding="utf-8") as file:
config = yaml.safe_load(file)
search_config = {**config["tree"], **config["node_evaluation"]}
policy_config = PolicyNetworkConfig.from_dict(
{**config["node_expansion"], **{"weights_path": policy_network}}
)
run_search(
targets_path=targets,
search_config=search_config,
policy_config=policy_config,
reaction_rules_path=reaction_rules,
building_blocks_path=building_blocks,
value_network_path=value_network,
results_root=results_dir,
)
@synplan.command(name="clustering")
@click.option(
"--targets",
required=True,
type=click.Path(exists=True),
help="Path to the file with target molecules for retrosynthetic planning.",
)
@click.option(
"--routes_file",
default=".",
type=click.Path(exists=False),
help="Path to the file where the planning results are stored.",
)
@click.option(
"--cluster_results_dir",
default=".",
type=click.Path(exists=False),
help="Path to the file where clustering results will be stored.",
)
@click.option(
"--perform_subcluster",
default=None,
type=click.Path(exists=False),
help="Perform subclustering.",
)
@click.option(
"--subcluster_results_dir",
default=".",
type=click.Path(exists=False),
help="Path to the file where subclustering results will be stored.",
)
def cluster_route_from_file_cli(
targets: str,
routes_file: str,
cluster_results_dir: str,
perform_subcluster: bool,
subcluster_results_dir: str,
):
"""Clustering the routes from planning"""
run_cluster_cli(
routes_file=routes_file,
cluster_results_dir=cluster_results_dir,
perform_subcluster=perform_subcluster,
subcluster_results_dir=subcluster_results_dir if perform_subcluster else None,
)
if __name__ == "__main__":
synplan()