import os import json class Evaluator(): def __init__(self, dataset, result_dir, raw_dir=""): self.dataset = dataset self.result_dir = result_dir if self.dataset == "openmolinst": self.raw_dir = raw_dir def _load_json(self, file_path): with open(file_path, 'r') as f: contents = json.load(f) return contents def _dump_json(self, content, file_path): with open(file_path, 'w') as f: json.dump(content, f, indent=4, ensure_ascii=False) def _get_pred_gold_smolinstruct(self, json_file): logs = self._load_json(json_file) preds, gts = list(), list() for log in logs: preds.append([log["extracted_answer"]]) gts.append([log["extracted_gt"]]) return preds, gts def _get_pred_gold_openmolinst(self, csv_file, json_file): gts = pd.read_csv(csv_file) preds = list() logs = self._load_json(json_file) for log in logs: preds.append(log["extracted_answer"]) return preds, gts def _get_pred_gold_mumoinstruct(self, json_file): logs = self._load_json(json_file) input_smiles, input_props, output_smiles, output_props = dict(), dict(), dict(), dict() for log in logs: task = log['metadata']['subtask'] pred_smi = [log["extracted_answer"]] if isinstance(log["extracted_answer"], str) \ else log["extracted_answer"] if task in list(input_smiles.keys()): input_smiles[task].append(log['metadata']['source_smiles']) input_props[task].append(log['metadata']['source_props']) output_smiles[task].append(pred_smi) else: input_smiles[task] = [(log['metadata']['source_smiles'])] input_props[task] = [(log['metadata']['source_props'])] output_smiles[task] = [pred_smi] for task in input_smiles.keys(): output_props[task] = generate_props(output_smiles[task]) return input_smiles, input_props, output_smiles, output_props def eval(self, json_file, task=None): if self.dataset == "smolinstruct": pred_list, gold_list = self._get_pred_gold_smolinstruct(json_file=json_file) if task in ('forward_synthesis', 'description_guided_generation', 'name_conversion-i2s'): r = calculate_smiles_metrics(pred_list, gold_list) elif task in ('retrosynthesis',): r = calculate_smiles_metrics(pred_list, gold_list, metrics=('exact_match', 'fingerprint', 'multiple_match')) elif task in ('molecule_captioning',): r = calculate_text_metrics(pred_list, gold_list) elif task in ('name_conversion-i2f', 'name_conversion-s2f'): r = calculate_formula_metrics(pred_list, gold_list, metrics=('element_match',)) elif task in ('name_conversion-s2i',): r = calculate_formula_metrics(pred_list, gold_list, metrics=('split_match',)) elif task in ('property_prediction-esol', 'property_prediction-lipo'): r = calculate_number_metrics(pred_list, gold_list) elif task in ('property_prediction-bbbp', 'property_prediction-clintox', 'property_prediction-hiv', 'property_prediction-sider'): r = calculate_boolean_metrics(pred_list, gold_list) else: raise ValueError(task) print (r) return r elif self.dataset == "openmolinst": preds, golds = self._get_pred_gold_openmolinst(csv_file=os.path.join(self.raw_dir, "openmolinst_"+task+".csv"), json_file=json_file) if task == "moledit_add_component": r = eval_moledit_add_component(data=golds, target=preds) elif task == "moledit_delete_component": r = eval_moledit_delete_component(data=golds, target=preds) elif task == "moledit_sub_component": r = eval_moledit_sub_component(data=golds, target=preds) elif task == "molopt_logP": r = eval_molopt_logP(data=golds, target=preds) elif task == "molopt_MR": r = eval_molopt_MR(data=golds, target=preds) elif task == "molopt_QED": r = eval_molopt_QED(data=golds, target=preds) else: raise ValueError(task) print (r) return r elif self.dataset == "mumoinstruct": input_smiles, input_props, output_smiles, output_props = self._get_pred_gold_mumoinstruct(json_file) results = dict() for task in input_smiles.keys(): print (f"\n###### {task} ######") r, _ = compute_metrics(input_smiles=input_smiles[task], \ input_props=input_props[task], \ output_smiles=output_smiles[task], \ output_props_df=output_props[task], \ task=task, \ normalize=None) print (r) results[task] = r return results def evaluate_smolinstruct(self): result_files = os.listdir(self.result_dir) results = dict() for file_name in result_files: if "smolinstruct" not in file_name: continue task = file_name[:-5].replace("smolinstruct_", "") print (f"\n###### {task} ######") results[task] = self.eval(json_file=os.path.join(self.result_dir, file_name), task=task) self._dump_json(results, os.path.join(self.result_dir, "metrics.json")) def evaluate_openmolinst(self): result_files = os.listdir(self.result_dir) results = dict() for file_name in result_files: if "openmolinst" not in file_name: continue task = file_name[:-5].replace("openmolinst_", "") print (f"\n###### {task} ######") results[task] = self.eval(json_file=os.path.join(self.result_dir, file_name), task=task) self._dump_json(results, os.path.join(self.result_dir, "metrics.json")) def evaluate_mumoinstruct(self): result_files = os.listdir(self.result_dir) for file_name in result_files: if "mumoinstruct" not in file_name: continue results = self.eval(json_file=os.path.join(self.result_dir, file_name)) self._dump_json(results, os.path.join(self.result_dir, "metrics.json")) def run(self): if self.dataset == "smolinstruct": self.evaluate_smolinstruct() elif self.dataset == "openmolinst": self.evaluate_openmolinst() elif self.dataset == "mumoinstruct": self.evaluate_mumoinstruct() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="") parser.add_argument("--dataset_name", type=str, help="name of the dataset") parser.add_argument("--result_dir", type=str, help="path to result files") parser.add_argument("--raw_dir", type=str, default="evaluation/datasets/openmolinst_raw", help="path to raw files (for OpenMolInst)") args = parser.parse_args() if args.dataset_name == "smolinstruct": from utils.smolinstruct_metrics import calculate_smiles_metrics, calculate_formula_metrics, calculate_text_metrics, calculate_number_metrics, calculate_boolean_metrics workflow = Evaluator(dataset=args.dataset_name, result_dir=args.result_dir) workflow.run() elif args.dataset_name == "openmolinst": from utils.openmolinst_metrics import eval_moledit_add_component, eval_moledit_delete_component, eval_moledit_sub_component, eval_molopt_logP, eval_molopt_MR, eval_molopt_QED import pandas as pd workflow = Evaluator(dataset=args.dataset_name, result_dir=args.result_dir, raw_dir=args.raw_dir) workflow.run() elif args.dataset_name == "mumoinstruct": from utils.mumoinstruct_metrics import generate_props, compute_metrics workflow = Evaluator(dataset=args.dataset_name, result_dir=args.result_dir) workflow.run()