Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pubchempy as pcp | |
| from rdkit import Chem | |
| from rdkit.Chem import Draw | |
| import selfies as sf | |
| import re | |
| polymer_dict = { | |
| "polyethylene": ("[*]CC[*]", "ethene"), | |
| "polypropylene": ("[*]C(C)C[*]", "propene"), | |
| "polystyrene": ("[*]CC(c1ccccc1)[*]", "styrene"), | |
| "polyvinyl chloride": ("[*]C(Cl)C[*]", "vinyl chloride"), | |
| "polyvinyl alcohol": ("[*]C(CO)C[*]", "vinyl alcohol"), | |
| "polymethyl methacrylate": ("[*]C(C)(C(=O)OC)C[*]", "methyl methacrylate"), | |
| "polyacrylonitrile": ("[*]C(C#N)C[*]", "acrylonitrile"), | |
| "polyvinyl acetate": ("[*]C(C(=O)OC)C[*]", "vinyl acetate"), | |
| } | |
| def is_cas_number(text): | |
| # 簡單CAS格式正則 | |
| return bool(re.match(r"^\d{2,7}-\d{2}-\d$", text.strip())) | |
| def name_to_smiles_and_type(text): | |
| text_lower = text.lower().strip() | |
| # 1. 檢查是否為聚合物 | |
| if text_lower in polymer_dict: | |
| smiles, monomer = polymer_dict[text_lower] | |
| return smiles, 'polymer', text_lower, monomer | |
| # 2. 判斷是否為CAS No. | |
| if is_cas_number(text): | |
| try: | |
| c = pcp.get_compounds(text, 'rn') | |
| if c and len(c) > 0: | |
| for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]: | |
| smiles = getattr(c[0], attr, None) | |
| if smiles: return smiles, 'small molecule', text_lower, None | |
| inchi = getattr(c[0], "inchi", None) | |
| if inchi: | |
| mol = Chem.MolFromInchi(inchi) | |
| if mol: | |
| smiles_from_inchi = Chem.MolToSmiles(mol) | |
| return smiles_from_inchi, 'small molecule', text_lower, None | |
| except Exception: | |
| pass | |
| # 3. 一般名稱查詢 | |
| try: | |
| c = pcp.get_compounds(text, 'name') | |
| if c and len(c) > 0: | |
| for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]: | |
| smiles = getattr(c[0], attr, None) | |
| if smiles: return smiles, 'small molecule', text_lower, None | |
| inchi = getattr(c[0], "inchi", None) | |
| if inchi: | |
| mol = Chem.MolFromInchi(inchi) | |
| if mol: | |
| smiles_from_inchi = Chem.MolToSmiles(mol) | |
| return smiles_from_inchi, 'small molecule', text_lower, None | |
| except Exception: | |
| pass | |
| return None, None, text_lower, None | |
| def query_molecule(text): | |
| try: | |
| smiles, mol_type, orig_name, monomer_name = name_to_smiles_and_type(text) | |
| if not smiles: | |
| return "查無此分子名稱或CAS No.", "", "", None | |
| if mol_type == 'polymer': | |
| selfies_str = "(不支援聚合物片段 SMILES 轉 SELFIES)" | |
| msg = f"{smiles} (聚合物重複單元 SMILES)" | |
| monomer_msg = f"對應單體(monomer)名稱:{monomer_name}" | |
| # 查單體 SMILES 與結構圖 | |
| monomer_smiles = "" | |
| img = None | |
| if monomer_name: | |
| try: | |
| c = pcp.get_compounds(monomer_name, 'name') | |
| if c and len(c) > 0: | |
| monomer_smiles = getattr(c[0], "isomeric_smiles", None) or getattr(c[0], "canonical_smiles", None) or getattr(c[0], "smiles", None) | |
| if monomer_smiles: | |
| mol = Chem.MolFromSmiles(monomer_smiles) | |
| if mol: | |
| img = Draw.MolToImage(mol, size=(300,200)) | |
| except Exception: | |
| monomer_smiles = "(查無單體 SMILES)" | |
| extra_msg = f"{monomer_msg}\n單體 SMILES: {monomer_smiles if monomer_smiles else '(查無)'}" | |
| return f"{msg}\n\n{extra_msg}", "(聚合物:無標準 InChI)", selfies_str, img | |
| else: | |
| try: | |
| inchi = Chem.MolToInchi(Chem.MolFromSmiles(smiles)) | |
| except Exception: | |
| inchi = "(InChI 轉換失敗)" | |
| try: | |
| selfies_str = sf.encoder(smiles) | |
| except Exception: | |
| selfies_str = "(SELFIES 轉換失敗)" | |
| try: | |
| mol = Chem.MolFromSmiles(smiles) | |
| img = Draw.MolToImage(mol, size=(300,200)) if mol else None | |
| except Exception: | |
| img = None | |
| return smiles, inchi, selfies_str, img | |
| except Exception as e: | |
| return f"系統錯誤: {e}", "錯誤", "錯誤", None | |
| iface = gr.Interface( | |
| fn=query_molecule, | |
| inputs=gr.Textbox(label="分子名稱或CAS號(如 styrene, 100-42-5, benzene, polystyrene...)"), | |
| outputs=[ | |
| gr.Textbox(label="SMILES(或聚合物重複單元)"), | |
| gr.Textbox(label="InChI"), | |
| gr.Textbox(label="SELFIES"), | |
| gr.Image(type="pil", label="結構圖或單體結構圖"), | |
| ], | |
| title="分子/聚合物名稱/CAS號查詢互動展示", | |
| description=( | |
| "輸入有機分子名稱、聚合物名稱,或 CAS No.,可顯示單體或聚合物重複單元 SMILES/SELFIES。\n" | |
| "聚合物僅顯示重複單元(不支援 SELFIES),自動顯示單體資訊。" | |
| ) | |
| ) | |
| iface.launch() | |