Presentation / app.py
Kung-Hsun's picture
Update app.py
1b340d8 verified
import gradio as gr
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import Draw
import selfies as sf
import re
polymer_dict = {
"polyethylene": ("[*]CC[*]", "ethene"),
"polypropylene": ("[*]C(C)C[*]", "propene"),
"polystyrene": ("[*]CC(c1ccccc1)[*]", "styrene"),
"polyvinyl chloride": ("[*]C(Cl)C[*]", "vinyl chloride"),
"polyvinyl alcohol": ("[*]C(CO)C[*]", "vinyl alcohol"),
"polymethyl methacrylate": ("[*]C(C)(C(=O)OC)C[*]", "methyl methacrylate"),
"polyacrylonitrile": ("[*]C(C#N)C[*]", "acrylonitrile"),
"polyvinyl acetate": ("[*]C(C(=O)OC)C[*]", "vinyl acetate"),
}
def is_cas_number(text):
# 簡單CAS格式正則
return bool(re.match(r"^\d{2,7}-\d{2}-\d$", text.strip()))
def name_to_smiles_and_type(text):
text_lower = text.lower().strip()
# 1. 檢查是否為聚合物
if text_lower in polymer_dict:
smiles, monomer = polymer_dict[text_lower]
return smiles, 'polymer', text_lower, monomer
# 2. 判斷是否為CAS No.
if is_cas_number(text):
try:
c = pcp.get_compounds(text, 'rn')
if c and len(c) > 0:
for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]:
smiles = getattr(c[0], attr, None)
if smiles: return smiles, 'small molecule', text_lower, None
inchi = getattr(c[0], "inchi", None)
if inchi:
mol = Chem.MolFromInchi(inchi)
if mol:
smiles_from_inchi = Chem.MolToSmiles(mol)
return smiles_from_inchi, 'small molecule', text_lower, None
except Exception:
pass
# 3. 一般名稱查詢
try:
c = pcp.get_compounds(text, 'name')
if c and len(c) > 0:
for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]:
smiles = getattr(c[0], attr, None)
if smiles: return smiles, 'small molecule', text_lower, None
inchi = getattr(c[0], "inchi", None)
if inchi:
mol = Chem.MolFromInchi(inchi)
if mol:
smiles_from_inchi = Chem.MolToSmiles(mol)
return smiles_from_inchi, 'small molecule', text_lower, None
except Exception:
pass
return None, None, text_lower, None
def query_molecule(text):
try:
smiles, mol_type, orig_name, monomer_name = name_to_smiles_and_type(text)
if not smiles:
return "查無此分子名稱或CAS No.", "", "", None
if mol_type == 'polymer':
selfies_str = "(不支援聚合物片段 SMILES 轉 SELFIES)"
msg = f"{smiles} (聚合物重複單元 SMILES)"
monomer_msg = f"對應單體(monomer)名稱:{monomer_name}"
# 查單體 SMILES 與結構圖
monomer_smiles = ""
img = None
if monomer_name:
try:
c = pcp.get_compounds(monomer_name, 'name')
if c and len(c) > 0:
monomer_smiles = getattr(c[0], "isomeric_smiles", None) or getattr(c[0], "canonical_smiles", None) or getattr(c[0], "smiles", None)
if monomer_smiles:
mol = Chem.MolFromSmiles(monomer_smiles)
if mol:
img = Draw.MolToImage(mol, size=(300,200))
except Exception:
monomer_smiles = "(查無單體 SMILES)"
extra_msg = f"{monomer_msg}\n單體 SMILES: {monomer_smiles if monomer_smiles else '(查無)'}"
return f"{msg}\n\n{extra_msg}", "(聚合物:無標準 InChI)", selfies_str, img
else:
try:
inchi = Chem.MolToInchi(Chem.MolFromSmiles(smiles))
except Exception:
inchi = "(InChI 轉換失敗)"
try:
selfies_str = sf.encoder(smiles)
except Exception:
selfies_str = "(SELFIES 轉換失敗)"
try:
mol = Chem.MolFromSmiles(smiles)
img = Draw.MolToImage(mol, size=(300,200)) if mol else None
except Exception:
img = None
return smiles, inchi, selfies_str, img
except Exception as e:
return f"系統錯誤: {e}", "錯誤", "錯誤", None
iface = gr.Interface(
fn=query_molecule,
inputs=gr.Textbox(label="分子名稱或CAS號(如 styrene, 100-42-5, benzene, polystyrene...)"),
outputs=[
gr.Textbox(label="SMILES(或聚合物重複單元)"),
gr.Textbox(label="InChI"),
gr.Textbox(label="SELFIES"),
gr.Image(type="pil", label="結構圖或單體結構圖"),
],
title="分子/聚合物名稱/CAS號查詢互動展示",
description=(
"輸入有機分子名稱、聚合物名稱,或 CAS No.,可顯示單體或聚合物重複單元 SMILES/SELFIES。\n"
"聚合物僅顯示重複單元(不支援 SELFIES),自動顯示單體資訊。"
)
)
iface.launch()