Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| from pathlib import Path | |
| from mmpdblib.fragment_io import read_fragment_records | |
| from rdkit import Chem | |
| def fragmentize_molecule(smiles_string, max_ratio=0.5): | |
| # 创建临时文件名 | |
| input_file = "temp_input.smi" | |
| output_file = "temp_output.fragments" | |
| # 将SMILES字符串写入临时输入文件 | |
| with open(input_file, "w") as f: | |
| f.write(smiles_string + "\t" + "Molecule" + "\n") | |
| # 使用mmpdb工具进行分子碎片化 | |
| os.system(f"mmpdb fragment {input_file} -o {output_file}") | |
| # 读取并处理碎片 | |
| fragment_reader = read_fragment_records(output_file) | |
| fragment_list = [] | |
| for record in fragment_reader: | |
| for frag in record.fragments: | |
| if count_heavy_atoms(frag.variable_smiles) < count_heavy_atoms(record.normalized_smiles) * max_ratio: | |
| fragment_list.append({ | |
| 'variable_smiles': frag.variable_smiles, | |
| 'constant_smiles': frag.constant_smiles, | |
| 'record_id': record.id, | |
| 'normalized_smiles': record.normalized_smiles, | |
| 'attachment_order': frag.attachment_order | |
| }) | |
| # 删除临时文件 | |
| os.remove(input_file) | |
| os.remove(output_file) | |
| # 返回碎片列表 | |
| return pd.DataFrame(fragment_list) | |
| def count_heavy_atoms(smiles): | |
| # 使用RDKit计算重原子数 | |
| mol = Chem.MolFromSmiles(smiles) | |
| return mol.GetNumHeavyAtoms() if mol else 0 | |
| # 示例调用 | |
| # smiles = "O=C1CCCC2=C1C1(CCS(=O)(=O)C1)N=C(Nc1nc3ccccc3o1)N2" | |
| # fragment_df = fragmentize_molecule(smiles) | |
| # print(fragment_df) | |