Spaces:
Running
Running
| from rdkit import Chem | |
| STRUCTURAL_ALERTS_DB = { | |
| "aromatic_amine": { | |
| "smarts": "c[NH2]", | |
| "name": "Primary Aromatic Amine", | |
| "detail": "Known to undergo metabolic activation forming mutagenic nitrenium ions.", | |
| }, | |
| "alkyl_halide": { | |
| "smarts": "[CX4][Cl,Br,I]", | |
| "name": "Alkyl Halide", | |
| "detail": "Direct-acting alkylating agent; electrophilic risk of DNA damage.", | |
| }, | |
| "hydrazine": { | |
| "smarts": "[NX3][NX3]", | |
| "name": "Hydrazine", | |
| "detail": "Associated with genotoxic risk and reactive oxygen species generation.", | |
| }, | |
| "aldehyde": { | |
| "smarts": "[CX3H1](=O)[#6]", | |
| "name": "Aldehyde", | |
| "detail": "Highly reactive electrophile; risk of DNA-protein crosslinks.", | |
| }, | |
| "nitro": { | |
| "smarts": "[NX3](=O)=O", | |
| "name": "Nitro Aromatic", | |
| "detail": "Undergoes metabolic reduction to reactive nitroso and hydroxylamine intermediates.", | |
| }, | |
| "epoxide": { | |
| "smarts": "[O]1[CH][CH]1", | |
| "name": "Epoxide", | |
| "detail": "Highly strained electrophilic ring; direct DNA alkylation risk.", | |
| }, | |
| "quinone": { | |
| "smarts": "O=c1cccc(=O)c1", | |
| "name": "Quinone", | |
| "detail": "Redox-active species generating ROS and forming covalent adducts.", | |
| }, | |
| "azide": { | |
| "smarts": "[NX2-]N#[N+]", | |
| "name": "Azide", | |
| "detail": "Explosive and reactive; potential for bioorthogonal toxicity.", | |
| }, | |
| "aflatoxin_b1_like": { | |
| "smarts": "O=C1OC2C3OC3C=CC2=C1", | |
| "name": "Aflatoxin-like Furan", | |
| "detail": "Metabolic epoxidation leads to DNA adduct formation.", | |
| }, | |
| "beta_lactam": { | |
| "smarts": "O=C1[CH][CH]N1", | |
| "name": "Beta-Lactam", | |
| "detail": "Acylating agent; risk of hypersensitivity and covalent protein binding.", | |
| }, | |
| } | |
| def identify_structural_alerts(smiles: str, high_attribution_indices: list = None): | |
| mol = Chem.MolFromSmiles(smiles) | |
| if mol is None: | |
| return [] | |
| alerts_found = [] | |
| for key, alert in STRUCTURAL_ALERTS_DB.items(): | |
| pattern = Chem.MolFromSmarts(alert["smarts"]) | |
| if pattern is None: | |
| continue | |
| matches = mol.GetSubstructMatches(pattern) | |
| for match in matches: | |
| match_attributed = False | |
| if high_attribution_indices and match: | |
| match_attributed = any(idx in high_attribution_indices for idx in match) | |
| alerts_found.append({ | |
| "alert_name": alert["name"], | |
| "details": alert["detail"], | |
| "matched_atoms": list(match), | |
| "high_attribution": match_attributed, | |
| }) | |
| return alerts_found | |