Monomerizer / app.py
Playingyoyo's picture
Update app.py
aff43ab verified
import os
os.environ["GRADIO_MCP_SERVER"] = "True"
# Monkey-patch gradio_client schema bug (safe for Gradio 5 - no-op if fixed)
try:
import gradio_client.utils as _gc_utils
_orig_gt = _gc_utils.get_type
def _p_gt(s):
return str(s) if not isinstance(s, dict) else _orig_gt(s)
_gc_utils.get_type = _p_gt
_orig_js = _gc_utils._json_schema_to_python_type
def _p_js(s, d=None):
return str(s) if not isinstance(s, dict) else _orig_js(s, d)
_gc_utils._json_schema_to_python_type = _p_js
except Exception:
pass
import gradio as gr
import subprocess, tempfile, os, glob, shutil, base64, json, re, csv, ast
from PIL import Image
from css import FULL_CSS, BUBBLES_HTML
from theme import create_theme
# Try importing cairosvg for SVG→PNG conversion
try:
import cairosvg
HAS_CAIROSVG = True
except ImportError:
HAS_CAIROSVG = False
# ── Helpers ──
def load_legend_html():
p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "legend.png")
if os.path.exists(p):
with open(p, "rb") as f:
d = base64.b64encode(f.read()).decode()
return f'<div style="display:flex;align-items:flex-end;justify-content:center;height:100%;"><img src="data:image/png;base64,{d}" style="max-width:100%;"></div>'
return ""
def load_background_css():
p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "background.png")
if os.path.exists(p):
with open(p, "rb") as f:
d = base64.b64encode(f.read()).decode()
return f"""
body, .gradio-container {{
background-image: url('data:image/png;base64,{d}') !important;
background-size: cover !important;
background-position: center !important;
background-attachment: fixed !important;
}}
"""
return ""
def load_example_image():
p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example.png")
if os.path.exists(p):
with open(p, "rb") as f:
d = base64.b64encode(f.read()).decode()
return f'<div style="text-align:center;padding:4px;"><img src="data:image/png;base64,{d}" style="max-width:100%;height:auto;"></div>'
return "<p style='color:#bbb;text-align:center;padding:12px;font-size:12px;'>Run to see visualizations</p>"
def load_dictionary():
dp = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dictionary.txt")
m = {}
if os.path.exists(dp):
with open(dp, "r") as f:
for r in csv.DictReader(f, delimiter="\t"):
aid = r.get("ID", "").strip()
pub = r.get("Pubchem", "").strip()
chm = r.get("ChEMBL", "").strip()
smi = r.get("SMILES", "").strip()
if not aid:
continue
if pub and pub not in ("NULL", "['NULL']"):
m[aid] = pub; continue
if chm and chm not in ("NULL", "['NULL']"):
try:
cl = ast.literal_eval(chm)
if isinstance(cl, list):
for c in cl:
if c and c not in ("NULL", "['NULL']"):
m[aid] = c; break
else:
m[aid] = smi if smi else aid
else:
m[aid] = smi if smi else aid
except Exception:
m[aid] = smi if smi else aid
continue
m[aid] = smi if smi else aid
return m
def tokenize_sequence(seq):
return re.findall(r'[XZ]\d+(?:ter)?|\[UNK\]|[A-WY?]', seq)
def format_results(output_dir, id_to_name):
raw2std = {}
mf = os.path.join(output_dir, "nc_raw2standard.txt")
if os.path.exists(mf):
with open(mf, "r") as f:
for r in csv.DictReader(f, delimiter="\t"):
ri, si = r.get("raw_ID", "").strip(), r.get("standard_ID", "").strip()
if ri and si: raw2std[ri] = si
sequences = []
sf = os.path.join(output_dir, "standard", "sequences_standardized.txt")
if os.path.exists(sf):
with open(sf, "r") as f:
for r in csv.DictReader(f, delimiter="\t"):
mi, sq = r.get("ID", "").strip(), r.get("SEQUENCE", "").strip()
if mi and sq: sequences.append((mi, sq))
std_smi = {}
snf = os.path.join(output_dir, "standard", "nc_standardized.txt")
if os.path.exists(snf):
with open(snf, "r") as f:
for r in csv.DictReader(f, delimiter="\t"):
si, sm = r.get("ID", "").strip(), r.get("SMILES", "").strip()
if si and sm: std_smi[si] = sm
def resolve(sid):
if sid in id_to_name: return id_to_name[sid]
if sid in std_smi: return std_smi[sid]
return sid
# Mapping
lines, seen = [], set()
for ri, si in sorted(raw2std.items()):
if si != "[UNK]":
lines.append(f"{ri} β†’ {resolve(si)}"); seen.add(si)
all_ids = set()
for _, sq in sequences:
for t in tokenize_sequence(sq):
if t[0] in "XZ": all_ids.add(t)
for si in sorted(all_ids):
if si not in seen: lines.append(f"{si} β†’ {resolve(si)}")
mt = "\n".join(lines) if lines else "No non-canonical amino acids found."
# Sequences
slines = []
for mi, sq in sequences:
tokens = tokenize_sequence(sq)
named = [resolve(t) if t[0] in "XZ" else ("Unknown" if t in ("?","[UNK]") else id_to_name.get(t, t)) for t in tokens]
slines.append(f"{mi}: {' --- '.join(named)}")
st = "\n".join(slines) if slines else "No sequences generated."
return mt, st
# ── Image helpers ──
def svg_to_png(svg_path, png_path, scale=2):
"""Convert an SVG file to PNG. Returns True on success."""
if HAS_CAIROSVG:
try:
cairosvg.svg2png(url=svg_path, write_to=png_path, scale=scale)
return True
except Exception:
pass
# Fallback: try Inkscape CLI
try:
subprocess.run(
["inkscape", svg_path, "--export-type=png", f"--export-filename={png_path}",
f"--export-dpi={96 * scale}"],
capture_output=True, timeout=30,
)
return os.path.exists(png_path)
except Exception:
pass
# Fallback: try rsvg-convert
try:
subprocess.run(
["rsvg-convert", "-z", str(scale), "-o", png_path, svg_path],
capture_output=True, timeout=30,
)
return os.path.exists(png_path)
except Exception:
pass
return False
def stitch_images(image_paths, padding=20, bg_color="white"):
"""Stitch multiple images vertically into one composite PNG.
Returns the output path, or None if no valid images."""
imgs = []
for p in image_paths:
try:
img = Image.open(p).convert("RGBA")
imgs.append(img)
except Exception:
continue
if not imgs:
return None
max_w = max(img.width for img in imgs)
total_h = sum(img.height for img in imgs) + padding * (len(imgs) - 1)
composite = Image.new("RGBA", (max_w, total_h), bg_color)
y = 0
for img in imgs:
# Center horizontally
x = (max_w - img.width) // 2
composite.paste(img, (x, y), img)
y += img.height + padding
# Convert to RGB for PNG output (no alpha)
composite_rgb = Image.new("RGB", composite.size, bg_color)
composite_rgb.paste(composite, mask=composite.split()[3])
out = os.path.join("/tmp", "monomerizer_composite.png")
composite_rgb.save(out, "PNG")
return out
def collect_and_stitch(output_dir):
"""Collect all molecule images from output_dir, convert SVGs to PNGs,
stitch into one composite image. Returns path to composite PNG or None."""
png_paths = []
# Convert SVGs to PNGs
svgs = sorted([f for f in glob.glob(os.path.join(output_dir, "**", "*.svg"), recursive=True)
if "legend" not in os.path.basename(f).lower()])
for i, sf in enumerate(svgs, 1):
png_out = os.path.join("/tmp", f"mono_converted_{i}.png")
if svg_to_png(sf, png_out):
png_paths.append(png_out)
# Also collect any existing PNGs from pipeline
existing_pngs = sorted(glob.glob(os.path.join(output_dir, "**", "*.png"), recursive=True))
for pf in existing_pngs:
stable = os.path.join("/tmp", f"mono_existing_{os.path.basename(pf)}")
shutil.copy2(pf, stable)
png_paths.append(stable)
if not png_paths:
return None
return stitch_images(png_paths)
# ── Core pipeline runner (shared by UI and MCP) ──
def _run_pipeline(smiles_text):
"""Run the Monomerizer pipeline on SMILES input.
Returns (mapping, sequences, output_dir) or raises."""
if not smiles_text or not smiles_text.strip():
raise ValueError("No input provided.")
lines = [l.strip() for l in smiles_text.strip().split("\n") if l.strip()]
if lines and lines[0].upper() == "SMILES":
lines = lines[1:]
if not lines:
raise ValueError("No SMILES found in input.")
tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, dir="/tmp")
tmp.write("SMILES\n")
for l in lines:
tmp.write(l + "\n")
tmp.close()
od = tempfile.mkdtemp(prefix="mono_", dir="/tmp")
res = subprocess.run(
["python3", "run_pipeline.py", "--input_file", tmp.name, "--output_dir", od, "-draw"],
capture_output=True, text=True, timeout=600,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
idn = load_dictionary()
mt, st = format_results(od, idn)
if res.returncode != 0 and res.stderr:
mt = f"Pipeline warning:\n{res.stderr}\n\n{mt}"
return mt, st, od
# ══════════════════════════════════════════════════════
# MCP API function β€” returns text + single composite image
# ══════════════════════════════════════════════════════
def analyze_smiles(smiles_text: str) -> tuple[str, str | None]:
"""Decompose peptide/peptidomimetic SMILES into amino acid sequences using Monomerizer.
Takes one or more SMILES strings (one per line) representing peptides or
peptidomimetics (including those with non-canonical amino acids), and returns:
- A mapping of non-canonical amino acid IDs to their chemical names
- Named amino acid sequences for each input molecule
- A composite image of all molecular structure visualizations
Args:
smiles_text: One or more SMILES strings separated by newlines.
Returns:
A tuple of (formatted text results, path to composite visualization image).
The text contains the non-canonical amino acid mapping and sequences.
The image shows all molecular structures stitched vertically.
"""
try:
mt, st, od = _run_pipeline(smiles_text)
except ValueError as e:
return str(e), None
except subprocess.TimeoutExpired:
return "Pipeline timed out after 10 minutes.", None
except Exception as e:
return f"Error: {e}", None
# Build formatted text response
parts = []
parts.append("═══ Non-canonical Amino Acid Mapping ═══")
parts.append(mt)
parts.append("")
parts.append("═══ Sequences ═══")
parts.append(st)
# Build composite image from all molecule visualizations
composite_path = collect_and_stitch(od)
if composite_path:
parts.append("\n═══ Molecular visualization returned as image ═══")
return "\n".join(parts), composite_path
# ── UI pipeline function ──
def run_monomerizer(smiles_text):
try:
mt, st, od = _run_pipeline(smiles_text)
except ValueError as e:
return str(e), "", "", "0 / 0", "[]", "[]"
except subprocess.TimeoutExpired:
return "⚠️ Timed out.", "", "", "0 / 0", "[]", "[]"
except Exception as e:
return f"❌ {e}", "", "", "0 / 0", "[]", "[]"
svgs = sorted([f for f in glob.glob(os.path.join(od, "**", "*.svg"), recursive=True)
if "legend" not in os.path.basename(f).lower()])
pngs = sorted(glob.glob(os.path.join(od, "**", "*.png"), recursive=True))
imgs, spaths = [], []
for i, sf in enumerate(svgs, 1):
try:
with open(sf, "r") as f: sc = f.read()
imgs.append(f'<div style="text-align:center;padding:6px;"><p style="font-size:13px;font-weight:bold;color:#555;margin-bottom:4px;">{i}</p><div>{sc}</div></div>')
sp = os.path.join("/tmp", f"mono_svg_{i}.svg")
shutil.copy2(sf, sp); spaths.append(sp)
except: pass
for i, pf in enumerate(pngs, len(svgs)+1):
try:
with open(pf, "rb") as f: b = base64.b64encode(f.read()).decode()
imgs.append(f'<div style="text-align:center;padding:6px;"><p style="font-size:13px;font-weight:bold;color:#555;margin-bottom:4px;">{i}</p><img src="data:image/png;base64,{b}" style="max-width:100%;"></div>')
except: pass
ij, sj = json.dumps(imgs), json.dumps(spaths)
if imgs:
return mt, st, imgs[0], f"1 / {len(imgs)}", ij, sj
return mt, st, "<p style='color:#bbb;text-align:center;'>No visualizations.</p>", "0 / 0", ij, sj
def navigate(direction, ij, sj, ctr):
try:
imgs = json.loads(ij) if ij else []
sp = json.loads(sj) if sj else []
except: return "", "0 / 0", None
if not imgs: return "<p style='color:#bbb;text-align:center;'>No visualizations.</p>", "0 / 0", None
try: cur = int(ctr.split("/")[0].strip()) - 1
except: cur = 0
t = len(imgs)
cur = (cur + (1 if direction == "next" else -1)) % t
return imgs[cur], f"{cur+1} / {t}", sp[cur] if cur < len(sp) else None
def get_svg(sj, ctr):
try:
sp = json.loads(sj) if sj else []
cur = int(ctr.split("/")[0].strip()) - 1
if 0 <= cur < len(sp): return sp[cur]
except: pass
return None
# ── Defaults ──
example_svg_html = load_example_image()
example_smiles = """NC(=O)[C@@H]1C[C@H](NC(=O)C(F)(F)F)CN1C(=O)[C@H](N)CCC/N=C(\\N)N[N+](=O)[O-]
CC[C@H](C)[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCCNC(=N)N)NC(=O)OCc1ccccc1)[C@@H](O)CC(=O)NC1CCCCC1
CC(C)C[C@H](NC(=O)[C@@H]1CCCN1C(=O)[C@H](N)Cc1ccccc1)C(=O)NCC(=O)N1CCC[C@H]1C(=O)NCC(=O)Nc1ccc(N(CCCl)CCCl)cc1""".strip()
# ── UI ──
with gr.Blocks(title="Monomerizer", theme=create_theme(), css=FULL_CSS + load_background_css()) as demo:
images_state = gr.State("")
svg_paths_state = gr.State("")
# Header
gr.HTML("""
<div style="text-align:center;padding:10px 0 4px 0;">
<h1 style="margin:0;font-size:36px;color:#fff;text-shadow:0 3px 12px rgba(0,0,0,0.4);">🧬 Monomerizer</h1>
<p style="margin:4px 0 0 0;font-size:15px;color:rgba(255,255,255,0.95);text-shadow:0 2px 8px rgba(0,0,0,0.4);">
Convert SMILES to amino acid sequences &nbsp;
<a href="https://chemrxiv.org/engage/chemrxiv/article-details/67ecf39181d2151a02aad52a" target="_blank" style="color:#fff;font-weight:700;">πŸ“„ Paper</a> Β·
<a href="https://github.com/tsudalab/Monomerizer" target="_blank" style="color:#fff;font-weight:700;">πŸ’» GitHub</a> Β·
<a href="https://huggingface.co/Playingyoyo/GPepT" target="_blank" style="color:#fff;font-weight:700;">πŸ€– GPepT</a>
</p>
<p style="margin:3px 0 0 0;font-size:13px;color:rgba(255,255,255,0.85);text-shadow:0 1px 6px rgba(0,0,0,0.4);font-style:italic;">
Also works with peptidomimetics containing non-canonical amino acids
</p>
</div>
""")
# Input
gr.HTML('<p class="section-label">⬇ Input</p>')
with gr.Group(elem_classes=["input-card"]):
with gr.Row():
smiles_input = gr.Textbox(
label="SMILES (one per line. Delete examples to enter yours)", lines=2, max_lines=4,
placeholder="Paste SMILES here, one per line...",
value=example_smiles, elem_classes=["smiles-input"], scale=4,
)
run_btn = gr.Button("πŸš€ Run", variant="primary", size="lg", elem_classes=["run-btn"], scale=1)
# Output
gr.HTML('<p class="section-label">⬆ Output</p>')
with gr.Group(elem_classes=["viz-card"]):
with gr.Row():
with gr.Column(scale=4):
with gr.Row():
prev_btn = gr.Button("β—€", size="sm", scale=1, min_width=40, elem_classes=["nav-btn"])
counter_display = gr.Textbox(value="0 / 0", show_label=False, interactive=False, scale=1, container=False, elem_classes=["compact-text"])
next_btn = gr.Button("β–Ά", size="sm", scale=1, min_width=40, elem_classes=["nav-btn"])
save_svg_btn = gr.Button("πŸ’Ύ Save", size="sm", scale=1, min_width=70, elem_classes=["save-btn"])
output_html = gr.HTML(value=example_svg_html)
svg_download = gr.File(label="Download SVG", visible=False)
with gr.Column(scale=1):
lh = load_legend_html()
if lh: gr.HTML(lh)
with gr.Group(elem_classes=["output-card"]):
with gr.Row():
output_mapping = gr.Textbox(label="Non-canonical Amino Acid Mapping", lines=4, show_copy_button=True, elem_classes=["compact-text"])
output_sequences = gr.Textbox(label="Sequences", lines=4, show_copy_button=True, elem_classes=["compact-text"])
# Footer
gr.HTML("""<div class="footer-note">
⚠️ Labels are specific to this batch. Run on
<a href="https://github.com/tsudalab/Monomerizer" target="_blank">GitHub</a>
to make IDs compatible with our foundation model
<a href="https://huggingface.co/Playingyoyo/GPepT" target="_blank">GPepT</a>.
</div>""")
# ══════════════════════════════════════════════════
# MCP API ENDPOINT β€” returns text + composite image
# gr.Image serializes as a native image for MCP clients
# ══════════════════════════════════════════════════
with gr.Row(visible=False):
api_input = gr.Textbox()
api_output = gr.Textbox()
api_image_output = gr.Image(type="filepath")
api_btn = gr.Button()
api_btn.click(
fn=analyze_smiles,
inputs=[api_input],
outputs=[api_output, api_image_output],
api_name="analyze_smiles",
)
# ── UI Events (all hidden from MCP) ──
run_btn.click(fn=run_monomerizer, inputs=[smiles_input],
outputs=[output_mapping, output_sequences, output_html, counter_display, images_state, svg_paths_state],
api_name=False)
prev_btn.click(fn=lambda i,s,c: navigate("prev",i,s,c), inputs=[images_state, svg_paths_state, counter_display],
outputs=[output_html, counter_display, svg_download], api_name=False)
next_btn.click(fn=lambda i,s,c: navigate("next",i,s,c), inputs=[images_state, svg_paths_state, counter_display],
outputs=[output_html, counter_display, svg_download], api_name=False)
save_svg_btn.click(fn=get_svg, inputs=[svg_paths_state, counter_display], outputs=[svg_download], api_name=False).then(
fn=lambda: gr.update(visible=True), outputs=[svg_download], api_name=False)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)