Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,42 +17,61 @@ polymer_dict = {
|
|
| 17 |
}
|
| 18 |
|
| 19 |
def is_cas_number(text):
|
|
|
|
| 20 |
return bool(re.match(r"^\d{2,7}-\d{2}-\d$", text.strip()))
|
| 21 |
|
| 22 |
def name_to_smiles_and_type(text):
|
| 23 |
text_lower = text.lower().strip()
|
|
|
|
| 24 |
if text_lower in polymer_dict:
|
| 25 |
smiles, monomer = polymer_dict[text_lower]
|
| 26 |
-
return smiles, 'polymer',
|
|
|
|
| 27 |
if is_cas_number(text):
|
| 28 |
try:
|
| 29 |
c = pcp.get_compounds(text, 'rn')
|
| 30 |
if c and len(c) > 0:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
except Exception:
|
| 35 |
pass
|
|
|
|
| 36 |
try:
|
| 37 |
c = pcp.get_compounds(text, 'name')
|
| 38 |
if c and len(c) > 0:
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
except Exception:
|
| 43 |
pass
|
| 44 |
-
return None, None,
|
| 45 |
|
| 46 |
def query_molecule(text):
|
| 47 |
try:
|
| 48 |
-
smiles, mol_type,
|
| 49 |
-
# 查無
|
| 50 |
if not smiles:
|
| 51 |
-
return "查無此分子名稱或CAS No.", "", "",
|
| 52 |
-
|
| 53 |
if mol_type == 'polymer':
|
| 54 |
selfies_str = "(不支援聚合物片段 SMILES 轉 SELFIES)"
|
| 55 |
msg = f"{smiles} (聚合物重複單元 SMILES)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
img = None
|
| 57 |
if monomer_name:
|
| 58 |
try:
|
|
@@ -64,10 +83,9 @@ def query_molecule(text):
|
|
| 64 |
if mol:
|
| 65 |
img = Draw.MolToImage(mol, size=(300,200))
|
| 66 |
except Exception:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
return msg, "(聚合物:無標準 InChI)", selfies_str,
|
| 70 |
-
# 小分子
|
| 71 |
else:
|
| 72 |
try:
|
| 73 |
inchi = Chem.MolToInchi(Chem.MolFromSmiles(smiles))
|
|
@@ -82,12 +100,9 @@ def query_molecule(text):
|
|
| 82 |
img = Draw.MolToImage(mol, size=(300,200)) if mol else None
|
| 83 |
except Exception:
|
| 84 |
img = None
|
| 85 |
-
|
| 86 |
-
mw = f"{mw:.3f}" if mw else ""
|
| 87 |
-
# **一定 return 5 個欄位**
|
| 88 |
-
return smiles, inchi, selfies_str, mw, img
|
| 89 |
except Exception as e:
|
| 90 |
-
return f"系統錯誤: {e}", "錯誤", "錯誤",
|
| 91 |
|
| 92 |
iface = gr.Interface(
|
| 93 |
fn=query_molecule,
|
|
@@ -96,11 +111,13 @@ iface = gr.Interface(
|
|
| 96 |
gr.Textbox(label="SMILES(或聚合物重複單元)"),
|
| 97 |
gr.Textbox(label="InChI"),
|
| 98 |
gr.Textbox(label="SELFIES"),
|
| 99 |
-
gr.Textbox(label="分子量"),
|
| 100 |
gr.Image(type="pil", label="結構圖或單體結構圖"),
|
| 101 |
],
|
| 102 |
-
title="分子/聚合物
|
| 103 |
-
description=
|
|
|
|
|
|
|
|
|
|
| 104 |
)
|
| 105 |
|
| 106 |
iface.launch()
|
|
|
|
| 17 |
}
|
| 18 |
|
| 19 |
def is_cas_number(text):
|
| 20 |
+
# 簡單CAS格式正則
|
| 21 |
return bool(re.match(r"^\d{2,7}-\d{2}-\d$", text.strip()))
|
| 22 |
|
| 23 |
def name_to_smiles_and_type(text):
|
| 24 |
text_lower = text.lower().strip()
|
| 25 |
+
# 1. 檢查是否為聚合物
|
| 26 |
if text_lower in polymer_dict:
|
| 27 |
smiles, monomer = polymer_dict[text_lower]
|
| 28 |
+
return smiles, 'polymer', text_lower, monomer
|
| 29 |
+
# 2. 判斷是否為CAS No.
|
| 30 |
if is_cas_number(text):
|
| 31 |
try:
|
| 32 |
c = pcp.get_compounds(text, 'rn')
|
| 33 |
if c and len(c) > 0:
|
| 34 |
+
for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]:
|
| 35 |
+
smiles = getattr(c[0], attr, None)
|
| 36 |
+
if smiles: return smiles, 'small molecule', text_lower, None
|
| 37 |
+
inchi = getattr(c[0], "inchi", None)
|
| 38 |
+
if inchi:
|
| 39 |
+
mol = Chem.MolFromInchi(inchi)
|
| 40 |
+
if mol:
|
| 41 |
+
smiles_from_inchi = Chem.MolToSmiles(mol)
|
| 42 |
+
return smiles_from_inchi, 'small molecule', text_lower, None
|
| 43 |
except Exception:
|
| 44 |
pass
|
| 45 |
+
# 3. 一般名稱查詢
|
| 46 |
try:
|
| 47 |
c = pcp.get_compounds(text, 'name')
|
| 48 |
if c and len(c) > 0:
|
| 49 |
+
for attr in ["isomeric_smiles", "canonical_smiles", "smiles"]:
|
| 50 |
+
smiles = getattr(c[0], attr, None)
|
| 51 |
+
if smiles: return smiles, 'small molecule', text_lower, None
|
| 52 |
+
inchi = getattr(c[0], "inchi", None)
|
| 53 |
+
if inchi:
|
| 54 |
+
mol = Chem.MolFromInchi(inchi)
|
| 55 |
+
if mol:
|
| 56 |
+
smiles_from_inchi = Chem.MolToSmiles(mol)
|
| 57 |
+
return smiles_from_inchi, 'small molecule', text_lower, None
|
| 58 |
except Exception:
|
| 59 |
pass
|
| 60 |
+
return None, None, text_lower, None
|
| 61 |
|
| 62 |
def query_molecule(text):
|
| 63 |
try:
|
| 64 |
+
smiles, mol_type, orig_name, monomer_name = name_to_smiles_and_type(text)
|
|
|
|
| 65 |
if not smiles:
|
| 66 |
+
return "查無此分子名稱或CAS No.", "", "", None
|
| 67 |
+
|
| 68 |
if mol_type == 'polymer':
|
| 69 |
selfies_str = "(不支援聚合物片段 SMILES 轉 SELFIES)"
|
| 70 |
msg = f"{smiles} (聚合物重複單元 SMILES)"
|
| 71 |
+
monomer_msg = f"對應單體(monomer)名稱:{monomer_name}"
|
| 72 |
+
|
| 73 |
+
# 查單體 SMILES 與結構圖
|
| 74 |
+
monomer_smiles = ""
|
| 75 |
img = None
|
| 76 |
if monomer_name:
|
| 77 |
try:
|
|
|
|
| 83 |
if mol:
|
| 84 |
img = Draw.MolToImage(mol, size=(300,200))
|
| 85 |
except Exception:
|
| 86 |
+
monomer_smiles = "(查無單體 SMILES)"
|
| 87 |
+
extra_msg = f"{monomer_msg}\n單體 SMILES: {monomer_smiles if monomer_smiles else '(查無)'}"
|
| 88 |
+
return f"{msg}\n\n{extra_msg}", "(聚合物:無標準 InChI)", selfies_str, img
|
|
|
|
| 89 |
else:
|
| 90 |
try:
|
| 91 |
inchi = Chem.MolToInchi(Chem.MolFromSmiles(smiles))
|
|
|
|
| 100 |
img = Draw.MolToImage(mol, size=(300,200)) if mol else None
|
| 101 |
except Exception:
|
| 102 |
img = None
|
| 103 |
+
return smiles, inchi, selfies_str, img
|
|
|
|
|
|
|
|
|
|
| 104 |
except Exception as e:
|
| 105 |
+
return f"系統錯誤: {e}", "錯誤", "錯誤", None
|
| 106 |
|
| 107 |
iface = gr.Interface(
|
| 108 |
fn=query_molecule,
|
|
|
|
| 111 |
gr.Textbox(label="SMILES(或聚合物重複單元)"),
|
| 112 |
gr.Textbox(label="InChI"),
|
| 113 |
gr.Textbox(label="SELFIES"),
|
|
|
|
| 114 |
gr.Image(type="pil", label="結構圖或單體結構圖"),
|
| 115 |
],
|
| 116 |
+
title="分子/聚合物名稱/CAS號查詢互動展示",
|
| 117 |
+
description=(
|
| 118 |
+
"輸入有機分子名稱、聚合物名稱,或 CAS No.,可顯示單體或聚合物重複單元 SMILES/SELFIES。\n"
|
| 119 |
+
"聚合物僅顯示重複單元(不支援 SELFIES),自動顯示單體資訊。"
|
| 120 |
+
)
|
| 121 |
)
|
| 122 |
|
| 123 |
iface.launch()
|