| from rdkit import Chem |
| from rdkit.Chem import ( |
| Descriptors, rdMolDescriptors, Crippen, Lipinski, QED, AllChem, |
| ChemicalFeatures |
| ) |
| from rdkit.ML.Descriptors import MoleculeDescriptors |
| from rdkit.Chem import rdMolDescriptors as rdmd |
| import numpy as np |
| import os |
|
|
|
|
| def load_feature_factory(): |
| """Загружает стандартный RDKit FeatureFactory.""" |
| from rdkit import RDConfig |
|
|
| fdef = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef") |
| return ChemicalFeatures.BuildFeatureFactory(fdef) |
|
|
|
|
| def compute_gasteiger_stats(mol): |
| """Возвращает статистику по Gasteiger-зарядам или None.""" |
| try: |
| AllChem.ComputeGasteigerCharges(mol) |
| charges = [a.GetDoubleProp("_GasteigerCharge") for a in mol.GetAtoms()] |
| return { |
| "mean": float(np.mean(charges)), |
| "max": float(max(charges)), |
| "min": float(min(charges)), |
| } |
| except Exception: |
| return None |
|
|
|
|
| def compute_morgan_fp(mol, radius=2, n_bits=2048): |
| fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=n_bits) |
| bitstring = fp.ToBitString() |
| return { |
| "n_bits": n_bits, |
| "bits_on": bitstring.count("1"), |
| } |
|
|
|
|
| def compute_extra_descriptors(mol, n_show=10): |
| calc = MoleculeDescriptors.MolecularDescriptorCalculator( |
| [name for name, _ in Descriptors._descList] |
| ) |
| names = calc.GetDescriptorNames() |
| values = calc.CalcDescriptors(mol) |
| return { |
| "names": names[:n_show], |
| "values": values[:n_show], |
| } |
|
|
|
|
| def get_molecule_properties(smiles): |
| """Возвращает дескрипторы и свойства молекулы по SMILES.""" |
| mol = Chem.MolFromSmiles(smiles) |
| if mol is None: |
| raise ValueError("Не удалось создать молекулу из SMILES.") |
|
|
| mol = Chem.AddHs(mol) |
|
|
| |
| base = { |
| "n_atoms": mol.GetNumAtoms(), |
| "n_bonds": mol.GetNumBonds(), |
| "canonical_smiles": Chem.MolToSmiles(Chem.RemoveHs(mol), canonical=True), |
| "aromatic_rings": rdmd.CalcNumAromaticRings(mol), |
| "aliphatic_rings": rdmd.CalcNumAliphaticRings(mol), |
| "heterocycles": rdmd.CalcNumHeterocycles(mol), |
| } |
|
|
| |
| physchem = { |
| "mol_weight": Descriptors.MolWt(mol), |
| "logp": Crippen.MolLogP(mol), |
| "tpsa": Descriptors.TPSA(mol), |
| "h_donors": Lipinski.NumHDonors(mol), |
| "h_acceptors": Lipinski.NumHAcceptors(mol), |
| "rotatable_bonds": Descriptors.NumRotatableBonds(mol), |
| "fraction_csp3": rdMolDescriptors.CalcFractionCSP3(mol), |
| } |
|
|
| charges = compute_gasteiger_stats(mol) |
|
|
| factory = load_feature_factory() |
| feats = factory.GetFeaturesForMol(mol) |
| pharm = { |
| "n_features": len(feats), |
| "types": sorted({f.GetFamily() for f in feats}), |
| } |
|
|
| try: |
| qed_val = QED.qed(mol) |
| except Exception: |
| qed_val = None |
|
|
| lipinski_pass = ( |
| physchem["mol_weight"] <= 500 |
| and physchem["logp"] <= 5 |
| and physchem["h_donors"] <= 5 |
| and physchem["h_acceptors"] <= 10 |
| ) |
|
|
| lipinski = { |
| "mw_ok": physchem["mol_weight"] <= 500, |
| "logp_ok": physchem["logp"] <= 5, |
| "h_donors_ok": physchem["h_donors"] <= 5, |
| "h_acceptors_ok": physchem["h_acceptors"] <= 10, |
| "pass_": lipinski_pass, |
| } |
|
|
| fp = compute_morgan_fp(mol) |
| extra = compute_extra_descriptors(mol) |
|
|
| return { |
| "smiles": smiles, |
| "base": base, |
| "physchem": physchem, |
| "charges": charges, |
| "pharmacophore": pharm, |
| "qed": qed_val, |
| "lipinski": lipinski, |
| "fingerprint": fp, |
| "extra_descriptors": extra, |
| } |
|
|
| |
| if __name__ == "__main__": |
| data = get_molecule_properties("CC(=O)OC1=CC=CC=C1C(=O)O") |
| for section, value in data.items(): |
| print(f"\n=== {section.upper()} ===") |
| print(value) |
|
|